sgspy 1.0.1__cp310-cp310-manylinux_2_39_x86_64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (40) hide show
  1. sgspy/__init__.py +82 -0
  2. sgspy/_sgs.cpython-310-x86_64-linux-gnu.so +0 -0
  3. sgspy/calculate/__init__.py +18 -0
  4. sgspy/calculate/pca/__init__.py +2 -0
  5. sgspy/calculate/pca/pca.py +152 -0
  6. sgspy/calculate/representation/__init__.py +2 -0
  7. sgspy/calculate/representation/representation.py +3 -0
  8. sgspy/sample/__init__.py +30 -0
  9. sgspy/sample/ahels/__init__.py +2 -0
  10. sgspy/sample/ahels/ahels.py +3 -0
  11. sgspy/sample/clhs/__init__.py +2 -0
  12. sgspy/sample/clhs/clhs.py +198 -0
  13. sgspy/sample/nc/__init__.py +2 -0
  14. sgspy/sample/nc/nc.py +3 -0
  15. sgspy/sample/srs/__init__.py +2 -0
  16. sgspy/sample/srs/srs.py +224 -0
  17. sgspy/sample/strat/__init__.py +2 -0
  18. sgspy/sample/strat/strat.py +390 -0
  19. sgspy/sample/systematic/__init__.py +2 -0
  20. sgspy/sample/systematic/systematic.py +229 -0
  21. sgspy/stratify/__init__.py +27 -0
  22. sgspy/stratify/breaks/__init__.py +2 -0
  23. sgspy/stratify/breaks/breaks.py +218 -0
  24. sgspy/stratify/kmeans/__init__.py +2 -0
  25. sgspy/stratify/kmeans/kmeans.py +3 -0
  26. sgspy/stratify/map/__init__.py +2 -0
  27. sgspy/stratify/map/map_stratifications.py +240 -0
  28. sgspy/stratify/poly/__init__.py +2 -0
  29. sgspy/stratify/poly/poly.py +166 -0
  30. sgspy/stratify/quantiles/__init__.py +2 -0
  31. sgspy/stratify/quantiles/quantiles.py +272 -0
  32. sgspy/utils/__init__.py +18 -0
  33. sgspy/utils/plot.py +143 -0
  34. sgspy/utils/raster.py +602 -0
  35. sgspy/utils/vector.py +262 -0
  36. sgspy-1.0.1.data/data/sgspy/libonedal.so.3 +0 -0
  37. sgspy-1.0.1.data/data/sgspy/proj.db +0 -0
  38. sgspy-1.0.1.dist-info/METADATA +13 -0
  39. sgspy-1.0.1.dist-info/RECORD +40 -0
  40. sgspy-1.0.1.dist-info/WHEEL +5 -0
sgspy/__init__.py ADDED
@@ -0,0 +1,82 @@
1
+ ##
2
+ # @defgroup user User Documentation
3
+ # This is the documentation describing how to use the Python functions within the sgsPy
4
+ # package. For information on the underlying C++ implementations, see the developer
5
+ # docs.
6
+ #
7
+ # The first step in any processing using the sgsPy package will be to initialize in insance
8
+ # of either sgspy.SpatialRaster or sgspy.SpatialVector. These are the primary data inputs to
9
+ # all sgs functions, and information on their use can be found in the 'utils' section.
10
+ #
11
+ # The processing functions are split into three different categories: calculate, stratify,
12
+ # and sample. @n
13
+ # The calculate section contains various helpful functions to assist in sampling
14
+ # but are not necessarily a specific stratification or sampling function. Right now,
15
+ # it only has 'pca' or principal component analysis. @n
16
+ # The stratify section has various stratification functions including stratification according
17
+ # to user defined breaks 'breaks', stratification according to polygons 'poly', stratification
18
+ # along quantiles 'quantiles', and a method for mapping multiple existing stratificaiton outputs 'map'. @n
19
+ # The sample sections has various sampling functions including simple random sampling 'srs', stratified
20
+ # random sampling 'strat', systematic sampling 'systematic', and conditional latin hypercube sampling 'clhs'. @n
21
+
22
+ import os
23
+ import sys
24
+ import platform
25
+ import ctypes
26
+
27
+ if (platform.system() == 'Windows'):
28
+ vendored_lib_path = os.path.join(sys.prefix, "sgspy")
29
+ lib_path = os.path.join(sys.prefix, "Library", "bin")
30
+ os.add_dll_directory(vendored_lib_path)
31
+ os.add_dll_directory(lib_path)
32
+
33
+ if vendored_lib_path not in os.environ['PATH']:
34
+ os.environ['PATH'] = vendored_lib_path + os.pathsep + os.environ['PATH']
35
+
36
+ if lib_path not in os.environ['PATH']:
37
+ os.environ['PATH'] = lib_path + os.pathsep + os.environ['PATH']
38
+
39
+ else: #linux
40
+ #this library goes missing at runtime if we don't do this
41
+ ctypes.CDLL(os.path.join(sys.prefix, 'lib', 'libtbb.so.12'), os.RTLD_GLOBAL | os.RTLD_NOW)
42
+
43
+ GIGABYTE = 1073741824
44
+
45
+ from . import utils
46
+ from . import calculate
47
+ from . import sample
48
+ from . import stratify
49
+
50
+ from .utils import (
51
+ SpatialRaster,
52
+ SpatialVector,
53
+ )
54
+
55
+ from .calculate import (
56
+ pca,
57
+ representation,
58
+ )
59
+
60
+ from .sample import (
61
+ ahels,
62
+ clhs,
63
+ nc,
64
+ srs,
65
+ strat,
66
+ systematic,
67
+ )
68
+
69
+ from .stratify import (
70
+ breaks,
71
+ kmeans,
72
+ poly,
73
+ quantiles,
74
+ map,
75
+ )
76
+
77
+ __all__ = list(
78
+ set(utils.__all__) |
79
+ set(calculate.__all__) |
80
+ set(sample.__all__) |
81
+ set(stratify.__all__)
82
+ )
@@ -0,0 +1,18 @@
1
+ ##
2
+ # @defgroup user_calculate calculate
3
+ # @ingroup user
4
+ #
5
+ # documentation of additional calculation functions for sgsPy. At the moment just principal component analysis.
6
+
7
+ from . import (
8
+ pca,
9
+ representation,
10
+ )
11
+
12
+ from .pca import pca
13
+ from .representation import representation
14
+
15
+ __all__ = [
16
+ "pca",
17
+ "representation",
18
+ ]
@@ -0,0 +1,2 @@
1
+ from . import pca
2
+ from .pca import pca
@@ -0,0 +1,152 @@
1
+ # ******************************************************************************
2
+ #
3
+ # Project: sgs
4
+ # Purpose: principal component analysis (pca)
5
+ # Author: Joseph Meyer
6
+ # Date: October, 2025
7
+ #
8
+ # ******************************************************************************
9
+
10
+ ##
11
+ # @defgroup user_pca pca
12
+ # @ingroup user_calculate
13
+
14
+ import os
15
+ import sys
16
+ import tempfile
17
+ from sgspy.utils import SpatialRaster
18
+
19
+ sys.path.append(os.path.join(os.path.dirname(__file__), "..", ".."))
20
+ from _sgs import pca_cpp
21
+
22
+ GIGABYTE = 1073741824
23
+
24
+ ##
25
+ # @ingroup user_pca
26
+ # This functions conducts principal component analysis on the given
27
+ # raster.
28
+ #
29
+ # A number of output components must be provided as an integer. This integer
30
+ # must be less than or equal to the total number of bands in the input raster,
31
+ # and will be the number of bands in the output raster.
32
+ # A filename may be given to specify an output file location, otherwise
33
+ # a virtual file type will be used. The driver_options parameter is
34
+ # used to specify creation options for a the output raster.
35
+ # See options for the Gtiff driver here: https://gdal.org/en/stable/drivers/raster/gtiff.html#creation-options
36
+ #
37
+ # Principal components are calculated across all raster bands,
38
+ # along with mean and standard deviation of each raster band. The
39
+ # raster is both centered and scaled, then output values are calculated
40
+ # for each principal component.
41
+ #
42
+ # Examples
43
+ # --------------------
44
+ # rast = sgspy.SpatialRaster("raster.tif") @n
45
+ # pcomp = sgspy.calculate.pca(rast, 3)
46
+ #
47
+ # rast = sgspy.SpatialRaster("raster.tif") @n
48
+ # pcomp = sgspy.calculate.pca(rast, 2, filename="pca.tif", display_info=True)
49
+ #
50
+ # rast = sgspy.SpatialRaster("raster.tif") @n
51
+ # pcomp = sgspy.calculate.pca(rast, 1, filename="pca.tif", driver_options={"COMPRESS": "LZW"})
52
+ #
53
+ # Parameters
54
+ # --------------------
55
+ # rast : SpatialRaster @n
56
+ # raster data structure containing input raster bands @n @n
57
+ # num_comp : int @n
58
+ # the number of components @n @n
59
+ # filename : str @n
60
+ # output filename or '' if there should not be an output file @n @n
61
+ # display_info : bool @n
62
+ # whether to display principal component eigenvalues/eigenvectors @n @n
63
+ # driver_options : dict @n
64
+ # the creation options as defined by GDAL which will be passed when creating output files @n @n
65
+ #
66
+ # Returns
67
+ # --------------------
68
+ # a SpatialRaster object containing principal component bands
69
+ def pca(
70
+ rast: SpatialRaster,
71
+ num_comp: int,
72
+ filename: str = '',
73
+ display_info: bool = False,
74
+ driver_options: dict = None
75
+ ):
76
+
77
+ if type(rast) is not SpatialRaster:
78
+ print(type(rast))
79
+ raise TypeError("'rast' parameter must be of type sgspy.SpatialRaster.")
80
+
81
+ if type(num_comp) is not int:
82
+ raise TypeError("'num_comp' parameter must be of type int.")
83
+
84
+ if type(filename) is not str:
85
+ raise TypeError("'filename' parameter must be of type str.")
86
+
87
+ if type(display_info) is not bool:
88
+ raise TypeError("'display_info' parameter must be of type bool.")
89
+
90
+ if driver_options is not None and type(driver_options) is not dict:
91
+ raise TypeError("'driver_options' parameter, if given, must be of type dict.")
92
+
93
+ if rast.closed:
94
+ raise RuntimeError("the C++ object which the raster object wraps has been cleaned up and closed.")
95
+
96
+ breaks_dict = {}
97
+ large_raster = False
98
+ temp_folder = ""
99
+
100
+ #ensure number of components is acceptabe
101
+ if num_comp <= 0 or num_comp > len(rast.bands):
102
+ msg = f"the number of components must be greater than zero and less than or equal to the total number of raster bands ({len(rast.bands)})."
103
+ raise ValueError(msg)
104
+
105
+ #ensure driver options keys are string, and convert driver options vals to string
106
+ driver_options_str = {}
107
+ if driver_options:
108
+ for (key, val) in driver_options.items():
109
+ if type(key) is not str:
110
+ raise TypeError("the key for all key/value pairs in the driver_options dict must be a string.")
111
+ driver_options_str[key] = str(val)
112
+
113
+ #determine whether the raster should be categorized as 'large' and thus be processed in blocks
114
+ raster_size_bytes = 0
115
+ height = rast.height
116
+ width = rast.width
117
+ for i in range(len(rast.bands)):
118
+ pixel_size = rast.cpp_raster.get_raster_band_type_size(i)
119
+ band_size = height * width * pixel_size
120
+ raster_size_bytes += band_size
121
+ if band_size >= GIGABYTE:
122
+ large_raster = True
123
+ break
124
+
125
+ large_raster = large_raster or (raster_size_bytes > GIGABYTE * 4)
126
+
127
+ temp_dir = tempfile.mkdtemp()
128
+
129
+ [pcomp, eigenvectors, eigenvalues] = pca_cpp(
130
+ rast.cpp_raster,
131
+ num_comp,
132
+ large_raster,
133
+ temp_dir,
134
+ filename,
135
+ driver_options_str
136
+ )
137
+
138
+ if display_info:
139
+ print('eigenvectors:')
140
+ print(eigenvectors)
141
+ print()
142
+ print('eigenvalues:')
143
+ print(eigenvalues)
144
+ print()
145
+
146
+ pcomp_rast = SpatialRaster(pcomp)
147
+ pcomp_rast.have_temp_dir = True
148
+ pcomp_rast.temp_dir = temp_dir
149
+ pcomp_rast.temp_dataset = filename == "" and large_raster
150
+ pcomp_rast.filename = filename
151
+
152
+ return pcomp_rast
@@ -0,0 +1,2 @@
1
+ from . import representation
2
+ from .representation import representation
@@ -0,0 +1,3 @@
1
+ def representation():
2
+ print(__file__)
3
+ raise NotImplementedError
@@ -0,0 +1,30 @@
1
+ ##
2
+ # @defgroup user_sample sample
3
+ # @ingroup user
4
+ #
5
+ # Documentation for the sampling functions.
6
+
7
+ from . import (
8
+ ahels,
9
+ clhs,
10
+ nc,
11
+ srs,
12
+ strat,
13
+ systematic,
14
+ )
15
+
16
+ from .ahels import ahels
17
+ from .clhs import clhs
18
+ from .nc import nc
19
+ from .srs import srs
20
+ from .strat import strat
21
+ from .systematic import systematic
22
+
23
+ __all__ = [
24
+ "ahels",
25
+ "clhs",
26
+ "nc",
27
+ "srs",
28
+ "strat",
29
+ "systematic",
30
+ ]
@@ -0,0 +1,2 @@
1
+ from . import ahels
2
+ from .ahels import ahels
@@ -0,0 +1,3 @@
1
+ def ahels():
2
+ print(__file__)
3
+ raise NotImplementedError
@@ -0,0 +1,2 @@
1
+ from . import clhs
2
+ from .clhs import clhs
@@ -0,0 +1,198 @@
1
+ # ******************************************************************************
2
+ #
3
+ # Project: sgs
4
+ # Purpose: simple random sampling (srs)
5
+ # Author: Joseph Meyer
6
+ # Date: June, 2025
7
+ #
8
+ # ******************************************************************************
9
+
10
+ ##
11
+ # @defgroup user_clhs clhs
12
+ # @ingroup user_sample
13
+
14
+ import os
15
+ import sys
16
+ import tempfile
17
+ from typing import Optional
18
+
19
+ import numpy as np
20
+ import matplotlib.pyplot as plt
21
+
22
+ from sgspy.utils import (
23
+ SpatialRaster,
24
+ SpatialVector,
25
+ plot,
26
+ )
27
+
28
+ sys.path.append(os.path.join(os.path.dirname(__file__), "..", ".."))
29
+ from _sgs import clhs_cpp
30
+
31
+ ##
32
+ # @ingroup user_clhs
33
+ # This function conducts Conditioned Latin Hypercube Sampling, see the following article for an
34
+ # in depth description of the method itself:
35
+ #
36
+ # Minasny, B. and McBratney, A.B. 2006. A conditioned Latin hypercube method
37
+ # for sampling in the presence of ancillary information. Computers and Geosciences, 32:1378-1388.
38
+ #
39
+ # The number of output samples is decided by the 'num_samples' parameter. The 'iterations' parameter
40
+ # indicates the number of iterations the simulated annealing portion of the clhs algorithm will undertake
41
+ # in the case where a perfect latin hypercube is not found. A higher number of iterations may result in
42
+ # a more representative sample, although the standard value recommended by Misany and McBratney is 10000.
43
+ #
44
+ # The access parameter may be given to restrict the areas where sampling may occur. The algorithm will still
45
+ # attempt to find a latin hypercube representative across the entire feature space, not just the accessible
46
+ # pixels. The access vector may contain geometries of type LineString or MultiLineString. buff_outer specifies
47
+ # the buffer distance around the geometry which is allowed to be included in the sampling. buff_inner specifies
48
+ # the buffer distance around the geometry which is not allwoed to be included in the sampling. buff_outer must
49
+ # be larger than buff_inner. For a multi layer vector, layer_name must be specified.
50
+ #
51
+ # The output is an object of type sgspy.SpatialVector which contains the chosen sample points.
52
+ #
53
+ # Examples
54
+ # --------------------
55
+ # rast = sgspy.SpatialRaster("raster.tif") @n
56
+ # samples = sgspy.sample.clhs(rast, num_samples=250)
57
+ #
58
+ # rast = sgspy.SpatialRaster("raster.tif") @n
59
+ # samples = sgspy.sample.clhs(rast, num_samples=250, plot=True, filename="clhs_samples.shp")
60
+ #
61
+ # rast = sgspy.SpatialRaster("raster.tif") @n
62
+ # access = sgspy.SpatialVector("access_network.shp") @n
63
+ # samples = sgspy.sample.clhs(rast, num_samples=200, access=access, buff_outer=300)
64
+ #
65
+ # rast = sgspy.SpatialRaster("raster.tif") @n
66
+ # access = sgspy.SpatialVector("access_network.shp") @n
67
+ # samples = sgspy.sample.clhs(rast, num_samples=200, access=access, buff_inner=50, buff_outer=300)
68
+ #
69
+ # Parameters
70
+ # --------------------
71
+ # rast : SpatialRaster @n
72
+ # raster data structure containing input raster bands @n @n
73
+ # num_samples : int @n
74
+ # the target number of samples @n @n
75
+ # iterations : int @n
76
+ # the number of iterations in the clhs algorithms @n @n
77
+ # access : SpatialVector @n
78
+ # a vector specifying an access network @n @n
79
+ # layer_name : str @n
80
+ # the layer within the access network which will be used for sampling @n @n
81
+ # buff_inner : int | float @n
82
+ # buffer boundary specifying distance from access geometries which CANNOT be sampled @n @n
83
+ # buff_outer : int | float @n
84
+ # buffer boundary specifying distance from access geometries which CAN be sampled @n @n
85
+ # plot : bool @n
86
+ # whether to plot the output samples or not @n @n
87
+ # filename : str @n
88
+ # the filename to write to, or '' if file should not be written @n @n
89
+ #
90
+ # Returns
91
+ # --------------------
92
+ # a SpatialVector object containing point geometries of sample locations
93
+ def clhs(
94
+ rast: SpatialRaster,
95
+ num_samples: int,
96
+ iterations: int = 10000,
97
+ access: Optional[SpatialVector] = None,
98
+ layer_name: Optional[str] = None,
99
+ buff_inner: Optional[int | float] = None,
100
+ buff_outer: Optional[int | float] = None,
101
+ plot: bool = False,
102
+ filename: str = ''):
103
+
104
+ if type(rast) is not SpatialRaster:
105
+ raise TypeError("'rast' parameter must be of type sgspy.SpatialRaster.")
106
+
107
+ if type(num_samples) is not int:
108
+ raise TypeError("'num_samples' parameter must be of type int.")
109
+
110
+ if type(iterations) is not int:
111
+ raise TypeError("'iterations' parameter must be of type int.")
112
+
113
+ if access is not None and type(access) is not SpatialVector:
114
+ raise TypeError("'access' parameter, if given, must be of type sgspy.SpatialVector.")
115
+
116
+ if layer_name is not None and type(layer_name) is not str:
117
+ raise TypeError("'layer_name' parameter, if given, must be of type str.")
118
+
119
+ if buff_inner is not None and type(buff_inner) not in [int, float]:
120
+ raise TypeError("'buff_inner' parameter, if given, must be of type int or float.")
121
+
122
+ if buff_outer is not None and type(buff_outer) not in [int, float]:
123
+ raise TypeError("'buff_outer' parameter, if given, must be of type int or float.")
124
+
125
+ if type(plot) is not bool:
126
+ raise TypeError("'plot' parameter must be of type bool.")
127
+
128
+ if type(filename) is not str:
129
+ raise TypeError("'filename' parameter must be of type str.")
130
+
131
+ if rast.closed:
132
+ raise RuntimeError("the C++ object which the raster object wraps has been cleaned up and closed.")
133
+
134
+ if num_samples < 1:
135
+ raise ValueError("num_samples must be greater than 0")
136
+
137
+ if (access):
138
+ if layer_name is None:
139
+ if len(access.layers) > 1:
140
+ raise ValueError("if there are multiple layers in the access vector, layer_name parameter must be passed.")
141
+ layer_name = access.layers[0]
142
+
143
+ if layer_name not in access.layers:
144
+ raise ValueError("layer specified by 'layer_name' does not exist in the access vector")
145
+
146
+ if buff_inner is None or buff_inner < 0:
147
+ buff_inner = 0
148
+
149
+ if buff_outer is None or buff_outer < 0:
150
+ raise ValueError("if an access vector is given, buff_outer must be a float greater than 0.")
151
+
152
+ if buff_inner >= buff_outer:
153
+ raise ValueError("buff_outer must be greater than buff_inner")
154
+
155
+ access_vector = access.cpp_vector
156
+ else:
157
+ access_vector = None
158
+ layer_name = ""
159
+ buff_inner = -1
160
+ buff_outer = -1
161
+
162
+ temp_dir = rast.cpp_raster.get_temp_dir()
163
+ if temp_dir == "":
164
+ temp_dir = tempfile.mkdtemp()
165
+ rast.cpp_raster.set_temp_dir(temp_dir)
166
+
167
+ [sample_coordinates, cpp_vector] = clhs_cpp(
168
+ rast.cpp_raster,
169
+ num_samples,
170
+ iterations,
171
+ access_vector,
172
+ layer_name,
173
+ buff_inner,
174
+ buff_outer,
175
+ plot,
176
+ temp_dir,
177
+ filename
178
+ )
179
+
180
+ #plot new vector if requested
181
+ if plot:
182
+ try:
183
+ fig, ax = plt.subplots()
184
+ rast.plot(ax, band=rast.bands[0])
185
+ title = "samples on " + rast.bands[0]
186
+
187
+ if access:
188
+ access.plot('LineString', ax)
189
+ title += " with access"
190
+
191
+ ax.plot(sample_coordinates[0], sample_coordinates[1], '.r')
192
+ ax.set_title(label=title)
193
+ plt.show()
194
+
195
+ except Exception as e:
196
+ print("unable to plot output: " + str(e))
197
+
198
+ return SpatialVector(cpp_vector)
@@ -0,0 +1,2 @@
1
+ from . import nc
2
+ from .nc import nc
sgspy/sample/nc/nc.py ADDED
@@ -0,0 +1,3 @@
1
+ def nc():
2
+ print(__file__)
3
+ raise NotImplementedError
@@ -0,0 +1,2 @@
1
+ from . import srs
2
+ from .srs import srs