sgspy 1.0.2__cp310-cp310-manylinux_2_28_x86_64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (40) hide show
  1. sgspy/__init__.py +82 -0
  2. sgspy/_sgs.cpython-310-x86_64-linux-gnu.so +0 -0
  3. sgspy/calculate/__init__.py +18 -0
  4. sgspy/calculate/pca/__init__.py +2 -0
  5. sgspy/calculate/pca/pca.py +158 -0
  6. sgspy/calculate/representation/__init__.py +2 -0
  7. sgspy/calculate/representation/representation.py +3 -0
  8. sgspy/sample/__init__.py +30 -0
  9. sgspy/sample/ahels/__init__.py +2 -0
  10. sgspy/sample/ahels/ahels.py +3 -0
  11. sgspy/sample/clhs/__init__.py +2 -0
  12. sgspy/sample/clhs/clhs.py +202 -0
  13. sgspy/sample/nc/__init__.py +2 -0
  14. sgspy/sample/nc/nc.py +3 -0
  15. sgspy/sample/srs/__init__.py +2 -0
  16. sgspy/sample/srs/srs.py +228 -0
  17. sgspy/sample/strat/__init__.py +2 -0
  18. sgspy/sample/strat/strat.py +394 -0
  19. sgspy/sample/systematic/__init__.py +2 -0
  20. sgspy/sample/systematic/systematic.py +233 -0
  21. sgspy/stratify/__init__.py +27 -0
  22. sgspy/stratify/breaks/__init__.py +2 -0
  23. sgspy/stratify/breaks/breaks.py +222 -0
  24. sgspy/stratify/kmeans/__init__.py +2 -0
  25. sgspy/stratify/kmeans/kmeans.py +3 -0
  26. sgspy/stratify/map/__init__.py +2 -0
  27. sgspy/stratify/map/map_stratifications.py +244 -0
  28. sgspy/stratify/poly/__init__.py +2 -0
  29. sgspy/stratify/poly/poly.py +170 -0
  30. sgspy/stratify/quantiles/__init__.py +2 -0
  31. sgspy/stratify/quantiles/quantiles.py +276 -0
  32. sgspy/utils/__init__.py +18 -0
  33. sgspy/utils/plot.py +143 -0
  34. sgspy/utils/raster.py +605 -0
  35. sgspy/utils/vector.py +268 -0
  36. sgspy-1.0.2.data/data/sgspy/libonedal.so.3 +0 -0
  37. sgspy-1.0.2.data/data/sgspy/proj.db +0 -0
  38. sgspy-1.0.2.dist-info/METADATA +13 -0
  39. sgspy-1.0.2.dist-info/RECORD +40 -0
  40. sgspy-1.0.2.dist-info/WHEEL +5 -0
sgspy/__init__.py ADDED
@@ -0,0 +1,82 @@
1
+ ##
2
+ # @defgroup user User Documentation
3
+ # This is the documentation describing how to use the Python functions within the sgsPy
4
+ # package. For information on the underlying C++ implementations, see the developer
5
+ # docs.
6
+ #
7
+ # The first step in any processing using the sgsPy package will be to initialize in insance
8
+ # of either sgspy.SpatialRaster or sgspy.SpatialVector. These are the primary data inputs to
9
+ # all sgs functions, and information on their use can be found in the 'utils' section.
10
+ #
11
+ # The processing functions are split into three different categories: calculate, stratify,
12
+ # and sample. @n
13
+ # The calculate section contains various helpful functions to assist in sampling
14
+ # but are not necessarily a specific stratification or sampling function. Right now,
15
+ # it only has 'pca' or principal component analysis. @n
16
+ # The stratify section has various stratification functions including stratification according
17
+ # to user defined breaks 'breaks', stratification according to polygons 'poly', stratification
18
+ # along quantiles 'quantiles', and a method for mapping multiple existing stratificaiton outputs 'map'. @n
19
+ # The sample sections has various sampling functions including simple random sampling 'srs', stratified
20
+ # random sampling 'strat', systematic sampling 'systematic', and conditional latin hypercube sampling 'clhs'. @n
21
+
22
+ import os
23
+ import sys
24
+ import platform
25
+ import ctypes
26
+
27
+ if (platform.system() == 'Windows'):
28
+ vendored_lib_path = os.path.join(sys.prefix, "sgspy")
29
+ lib_path = os.path.join(sys.prefix, "Library", "bin")
30
+ os.add_dll_directory(vendored_lib_path)
31
+ os.add_dll_directory(lib_path)
32
+
33
+ if vendored_lib_path not in os.environ['PATH']:
34
+ os.environ['PATH'] = vendored_lib_path + os.pathsep + os.environ['PATH']
35
+
36
+ if lib_path not in os.environ['PATH']:
37
+ os.environ['PATH'] = lib_path + os.pathsep + os.environ['PATH']
38
+
39
+ else: #linux
40
+ #this library goes missing at runtime if we don't do this
41
+ ctypes.CDLL(os.path.join(sys.prefix, 'lib', 'libtbb.so.12'), os.RTLD_GLOBAL | os.RTLD_NOW)
42
+
43
+ GIGABYTE = 1073741824
44
+
45
+ from . import utils
46
+ from . import calculate
47
+ from . import sample
48
+ from . import stratify
49
+
50
+ from .utils import (
51
+ SpatialRaster,
52
+ SpatialVector,
53
+ )
54
+
55
+ from .calculate import (
56
+ pca,
57
+ representation,
58
+ )
59
+
60
+ from .sample import (
61
+ ahels,
62
+ clhs,
63
+ nc,
64
+ srs,
65
+ strat,
66
+ systematic,
67
+ )
68
+
69
+ from .stratify import (
70
+ breaks,
71
+ kmeans,
72
+ poly,
73
+ quantiles,
74
+ map,
75
+ )
76
+
77
+ __all__ = list(
78
+ set(utils.__all__) |
79
+ set(calculate.__all__) |
80
+ set(sample.__all__) |
81
+ set(stratify.__all__)
82
+ )
@@ -0,0 +1,18 @@
1
+ ##
2
+ # @defgroup user_calculate calculate
3
+ # @ingroup user
4
+ #
5
+ # documentation of additional calculation functions for sgsPy. At the moment just principal component analysis.
6
+
7
+ from . import (
8
+ pca,
9
+ representation,
10
+ )
11
+
12
+ from .pca import pca
13
+ from .representation import representation
14
+
15
+ __all__ = [
16
+ "pca",
17
+ "representation",
18
+ ]
@@ -0,0 +1,2 @@
1
+ from . import pca
2
+ from .pca import pca
@@ -0,0 +1,158 @@
1
+ # ******************************************************************************
2
+ #
3
+ # Project: sgs
4
+ # Purpose: principal component analysis (pca)
5
+ # Author: Joseph Meyer
6
+ # Date: October, 2025
7
+ #
8
+ # ******************************************************************************
9
+
10
+ ##
11
+ # @defgroup user_pca pca
12
+ # @ingroup user_calculate
13
+
14
+ import os
15
+ import sys
16
+ import site
17
+ import tempfile
18
+ from sgspy.utils import SpatialRaster
19
+
20
+ #ensure _sgs binary can be found
21
+ site_packages = list(filter(lambda x : 'site-packages' in x, site.getsitepackages()))[0]
22
+ sys.path.append(os.path.join(site_packages, "sgspy"))
23
+ sys.path.append(os.path.join(os.path.dirname(__file__), ".."))
24
+ from _sgs import pca_cpp
25
+
26
+ GIGABYTE = 1073741824
27
+
28
+ ##
29
+ # @ingroup user_pca
30
+ # This functions conducts principal component analysis on the given
31
+ # raster.
32
+ #
33
+ # A number of output components must be provided as an integer. This integer
34
+ # must be less than or equal to the total number of bands in the input raster,
35
+ # and will be the number of bands in the output raster.
36
+ # A filename may be given to specify an output file location, otherwise
37
+ # a virtual file type will be used. The driver_options parameter is
38
+ # used to specify creation options for a the output raster.
39
+ # See options for the Gtiff driver here: https://gdal.org/en/stable/drivers/raster/gtiff.html#creation-options
40
+ #
41
+ # Principal components are calculated across all raster bands,
42
+ # along with mean and standard deviation of each raster band. The
43
+ # raster is both centered and scaled, then output values are calculated
44
+ # for each principal component.
45
+ #
46
+ # Examples
47
+ # --------------------
48
+ # rast = sgspy.SpatialRaster("raster.tif") @n
49
+ # pcomp = sgspy.calculate.pca(rast, 3)
50
+ #
51
+ # rast = sgspy.SpatialRaster("raster.tif") @n
52
+ # pcomp = sgspy.calculate.pca(rast, 2, filename="pca.tif", display_info=True)
53
+ #
54
+ # rast = sgspy.SpatialRaster("raster.tif") @n
55
+ # pcomp = sgspy.calculate.pca(rast, 1, filename="pca.tif", driver_options={"COMPRESS": "LZW"})
56
+ #
57
+ # Parameters
58
+ # --------------------
59
+ # rast : SpatialRaster @n
60
+ # raster data structure containing input raster bands @n @n
61
+ # num_comp : int @n
62
+ # the number of components @n @n
63
+ # filename : str @n
64
+ # output filename or '' if there should not be an output file @n @n
65
+ # display_info : bool @n
66
+ # whether to display principal component eigenvalues/eigenvectors @n @n
67
+ # driver_options : dict @n
68
+ # the creation options as defined by GDAL which will be passed when creating output files @n @n
69
+ #
70
+ # Returns
71
+ # --------------------
72
+ # a SpatialRaster object containing principal component bands
73
+ def pca(
74
+ rast: SpatialRaster,
75
+ num_comp: int,
76
+ filename: str = '',
77
+ display_info: bool = False,
78
+ driver_options: dict = None
79
+ ):
80
+
81
+ if type(rast) is not SpatialRaster:
82
+ print(type(rast))
83
+ raise TypeError("'rast' parameter must be of type sgspy.SpatialRaster.")
84
+
85
+ if type(num_comp) is not int:
86
+ raise TypeError("'num_comp' parameter must be of type int.")
87
+
88
+ if type(filename) is not str:
89
+ raise TypeError("'filename' parameter must be of type str.")
90
+
91
+ if type(display_info) is not bool:
92
+ raise TypeError("'display_info' parameter must be of type bool.")
93
+
94
+ if driver_options is not None and type(driver_options) is not dict:
95
+ raise TypeError("'driver_options' parameter, if given, must be of type dict.")
96
+
97
+ if rast.closed:
98
+ raise RuntimeError("the C++ object which the raster object wraps has been cleaned up and closed.")
99
+
100
+ breaks_dict = {}
101
+ large_raster = False
102
+ temp_folder = ""
103
+
104
+ #ensure number of components is acceptabe
105
+ if num_comp <= 0 or num_comp > len(rast.bands):
106
+ msg = f"the number of components must be greater than zero and less than or equal to the total number of raster bands ({len(rast.bands)})."
107
+ raise ValueError(msg)
108
+
109
+ #ensure driver options keys are string, and convert driver options vals to string
110
+ driver_options_str = {}
111
+ if driver_options:
112
+ for (key, val) in driver_options.items():
113
+ if type(key) is not str:
114
+ raise TypeError("the key for all key/value pairs in the driver_options dict must be a string.")
115
+ driver_options_str[key] = str(val)
116
+
117
+ #determine whether the raster should be categorized as 'large' and thus be processed in blocks
118
+ raster_size_bytes = 0
119
+ height = rast.height
120
+ width = rast.width
121
+ for i in range(len(rast.bands)):
122
+ pixel_size = rast.cpp_raster.get_raster_band_type_size(i)
123
+ band_size = height * width * pixel_size
124
+ raster_size_bytes += band_size
125
+ if band_size >= GIGABYTE:
126
+ large_raster = True
127
+ break
128
+
129
+ large_raster = large_raster or (raster_size_bytes > GIGABYTE * 4)
130
+
131
+ temp_dir = tempfile.mkdtemp()
132
+ rast.have_temp_dir = True
133
+ rast.temp_dir = temp_dir
134
+
135
+ [pcomp, eigenvectors, eigenvalues] = pca_cpp(
136
+ rast.cpp_raster,
137
+ num_comp,
138
+ large_raster,
139
+ temp_dir,
140
+ filename,
141
+ driver_options_str
142
+ )
143
+
144
+ if display_info:
145
+ print('eigenvectors:')
146
+ print(eigenvectors)
147
+ print()
148
+ print('eigenvalues:')
149
+ print(eigenvalues)
150
+ print()
151
+
152
+ pcomp_rast = SpatialRaster(pcomp)
153
+ pcomp_rast.cpp_raster.set_temp_dir(temp_dir)
154
+ rast.have_temp_dir = False
155
+ pcomp_rast.temp_dataset = filename == "" and large_raster
156
+ pcomp_rast.filename = filename
157
+
158
+ return pcomp_rast
@@ -0,0 +1,2 @@
1
+ from . import representation
2
+ from .representation import representation
@@ -0,0 +1,3 @@
1
+ def representation():
2
+ print(__file__)
3
+ raise NotImplementedError
@@ -0,0 +1,30 @@
1
+ ##
2
+ # @defgroup user_sample sample
3
+ # @ingroup user
4
+ #
5
+ # Documentation for the sampling functions.
6
+
7
+ from . import (
8
+ ahels,
9
+ clhs,
10
+ nc,
11
+ srs,
12
+ strat,
13
+ systematic,
14
+ )
15
+
16
+ from .ahels import ahels
17
+ from .clhs import clhs
18
+ from .nc import nc
19
+ from .srs import srs
20
+ from .strat import strat
21
+ from .systematic import systematic
22
+
23
+ __all__ = [
24
+ "ahels",
25
+ "clhs",
26
+ "nc",
27
+ "srs",
28
+ "strat",
29
+ "systematic",
30
+ ]
@@ -0,0 +1,2 @@
1
+ from . import ahels
2
+ from .ahels import ahels
@@ -0,0 +1,3 @@
1
+ def ahels():
2
+ print(__file__)
3
+ raise NotImplementedError
@@ -0,0 +1,2 @@
1
+ from . import clhs
2
+ from .clhs import clhs
@@ -0,0 +1,202 @@
1
+ # ******************************************************************************
2
+ #
3
+ # Project: sgs
4
+ # Purpose: simple random sampling (srs)
5
+ # Author: Joseph Meyer
6
+ # Date: June, 2025
7
+ #
8
+ # ******************************************************************************
9
+
10
+ ##
11
+ # @defgroup user_clhs clhs
12
+ # @ingroup user_sample
13
+
14
+ import os
15
+ import sys
16
+ import site
17
+ import tempfile
18
+ from typing import Optional
19
+
20
+ import numpy as np
21
+ import matplotlib.pyplot as plt
22
+
23
+ from sgspy.utils import (
24
+ SpatialRaster,
25
+ SpatialVector,
26
+ plot,
27
+ )
28
+
29
+ #ensure _sgs binary can be found
30
+ site_packages = list(filter(lambda x : 'site-packages' in x, site.getsitepackages()))[0]
31
+ sys.path.append(os.path.join(site_packages, "sgspy"))
32
+ sys.path.append(os.path.join(os.path.dirname(__file__), ".."))
33
+ from _sgs import clhs_cpp
34
+
35
+ ##
36
+ # @ingroup user_clhs
37
+ # This function conducts Conditioned Latin Hypercube Sampling, see the following article for an
38
+ # in depth description of the method itself:
39
+ #
40
+ # Minasny, B. and McBratney, A.B. 2006. A conditioned Latin hypercube method
41
+ # for sampling in the presence of ancillary information. Computers and Geosciences, 32:1378-1388.
42
+ #
43
+ # The number of output samples is decided by the 'num_samples' parameter. The 'iterations' parameter
44
+ # indicates the number of iterations the simulated annealing portion of the clhs algorithm will undertake
45
+ # in the case where a perfect latin hypercube is not found. A higher number of iterations may result in
46
+ # a more representative sample, although the standard value recommended by Misany and McBratney is 10000.
47
+ #
48
+ # The access parameter may be given to restrict the areas where sampling may occur. The algorithm will still
49
+ # attempt to find a latin hypercube representative across the entire feature space, not just the accessible
50
+ # pixels. The access vector may contain geometries of type LineString or MultiLineString. buff_outer specifies
51
+ # the buffer distance around the geometry which is allowed to be included in the sampling. buff_inner specifies
52
+ # the buffer distance around the geometry which is not allwoed to be included in the sampling. buff_outer must
53
+ # be larger than buff_inner. For a multi layer vector, layer_name must be specified.
54
+ #
55
+ # The output is an object of type sgspy.SpatialVector which contains the chosen sample points.
56
+ #
57
+ # Examples
58
+ # --------------------
59
+ # rast = sgspy.SpatialRaster("raster.tif") @n
60
+ # samples = sgspy.sample.clhs(rast, num_samples=250)
61
+ #
62
+ # rast = sgspy.SpatialRaster("raster.tif") @n
63
+ # samples = sgspy.sample.clhs(rast, num_samples=250, plot=True, filename="clhs_samples.shp")
64
+ #
65
+ # rast = sgspy.SpatialRaster("raster.tif") @n
66
+ # access = sgspy.SpatialVector("access_network.shp") @n
67
+ # samples = sgspy.sample.clhs(rast, num_samples=200, access=access, buff_outer=300)
68
+ #
69
+ # rast = sgspy.SpatialRaster("raster.tif") @n
70
+ # access = sgspy.SpatialVector("access_network.shp") @n
71
+ # samples = sgspy.sample.clhs(rast, num_samples=200, access=access, buff_inner=50, buff_outer=300)
72
+ #
73
+ # Parameters
74
+ # --------------------
75
+ # rast : SpatialRaster @n
76
+ # raster data structure containing input raster bands @n @n
77
+ # num_samples : int @n
78
+ # the target number of samples @n @n
79
+ # iterations : int @n
80
+ # the number of iterations in the clhs algorithms @n @n
81
+ # access : SpatialVector @n
82
+ # a vector specifying an access network @n @n
83
+ # layer_name : str @n
84
+ # the layer within the access network which will be used for sampling @n @n
85
+ # buff_inner : int | float @n
86
+ # buffer boundary specifying distance from access geometries which CANNOT be sampled @n @n
87
+ # buff_outer : int | float @n
88
+ # buffer boundary specifying distance from access geometries which CAN be sampled @n @n
89
+ # plot : bool @n
90
+ # whether to plot the output samples or not @n @n
91
+ # filename : str @n
92
+ # the filename to write to, or '' if file should not be written @n @n
93
+ #
94
+ # Returns
95
+ # --------------------
96
+ # a SpatialVector object containing point geometries of sample locations
97
+ def clhs(
98
+ rast: SpatialRaster,
99
+ num_samples: int,
100
+ iterations: int = 10000,
101
+ access: Optional[SpatialVector] = None,
102
+ layer_name: Optional[str] = None,
103
+ buff_inner: Optional[int | float] = None,
104
+ buff_outer: Optional[int | float] = None,
105
+ plot: bool = False,
106
+ filename: str = ''):
107
+
108
+ if type(rast) is not SpatialRaster:
109
+ raise TypeError("'rast' parameter must be of type sgspy.SpatialRaster.")
110
+
111
+ if type(num_samples) is not int:
112
+ raise TypeError("'num_samples' parameter must be of type int.")
113
+
114
+ if type(iterations) is not int:
115
+ raise TypeError("'iterations' parameter must be of type int.")
116
+
117
+ if access is not None and type(access) is not SpatialVector:
118
+ raise TypeError("'access' parameter, if given, must be of type sgspy.SpatialVector.")
119
+
120
+ if layer_name is not None and type(layer_name) is not str:
121
+ raise TypeError("'layer_name' parameter, if given, must be of type str.")
122
+
123
+ if buff_inner is not None and type(buff_inner) not in [int, float]:
124
+ raise TypeError("'buff_inner' parameter, if given, must be of type int or float.")
125
+
126
+ if buff_outer is not None and type(buff_outer) not in [int, float]:
127
+ raise TypeError("'buff_outer' parameter, if given, must be of type int or float.")
128
+
129
+ if type(plot) is not bool:
130
+ raise TypeError("'plot' parameter must be of type bool.")
131
+
132
+ if type(filename) is not str:
133
+ raise TypeError("'filename' parameter must be of type str.")
134
+
135
+ if rast.closed:
136
+ raise RuntimeError("the C++ object which the raster object wraps has been cleaned up and closed.")
137
+
138
+ if num_samples < 1:
139
+ raise ValueError("num_samples must be greater than 0")
140
+
141
+ if (access):
142
+ if layer_name is None:
143
+ if len(access.layers) > 1:
144
+ raise ValueError("if there are multiple layers in the access vector, layer_name parameter must be passed.")
145
+ layer_name = access.layers[0]
146
+
147
+ if layer_name not in access.layers:
148
+ raise ValueError("layer specified by 'layer_name' does not exist in the access vector")
149
+
150
+ if buff_inner is None or buff_inner < 0:
151
+ buff_inner = 0
152
+
153
+ if buff_outer is None or buff_outer < 0:
154
+ raise ValueError("if an access vector is given, buff_outer must be a float greater than 0.")
155
+
156
+ if buff_inner >= buff_outer:
157
+ raise ValueError("buff_outer must be greater than buff_inner")
158
+
159
+ access_vector = access.cpp_vector
160
+ else:
161
+ access_vector = None
162
+ layer_name = ""
163
+ buff_inner = -1
164
+ buff_outer = -1
165
+
166
+ temp_dir = rast.cpp_raster.get_temp_dir()
167
+ if temp_dir == "":
168
+ temp_dir = tempfile.mkdtemp()
169
+ rast.cpp_raster.set_temp_dir(temp_dir)
170
+
171
+ [sample_coordinates, cpp_vector] = clhs_cpp(
172
+ rast.cpp_raster,
173
+ num_samples,
174
+ iterations,
175
+ access_vector,
176
+ layer_name,
177
+ buff_inner,
178
+ buff_outer,
179
+ plot,
180
+ temp_dir,
181
+ filename
182
+ )
183
+
184
+ #plot new vector if requested
185
+ if plot:
186
+ try:
187
+ fig, ax = plt.subplots()
188
+ rast.plot(ax, band=rast.bands[0])
189
+ title = "samples on " + rast.bands[0]
190
+
191
+ if access:
192
+ access.plot('LineString', ax)
193
+ title += " with access"
194
+
195
+ ax.plot(sample_coordinates[0], sample_coordinates[1], '.r')
196
+ ax.set_title(label=title)
197
+ plt.show()
198
+
199
+ except Exception as e:
200
+ print("unable to plot output: " + str(e))
201
+
202
+ return SpatialVector(cpp_vector)
@@ -0,0 +1,2 @@
1
+ from . import nc
2
+ from .nc import nc
sgspy/sample/nc/nc.py ADDED
@@ -0,0 +1,3 @@
1
+ def nc():
2
+ print(__file__)
3
+ raise NotImplementedError
@@ -0,0 +1,2 @@
1
+ from . import srs
2
+ from .srs import srs