openeo-gfmap 0.1.0__py3-none-any.whl → 0.3.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- openeo_gfmap/features/feature_extractor.py +9 -0
- openeo_gfmap/fetching/__init__.py +16 -4
- openeo_gfmap/fetching/commons.py +1 -0
- openeo_gfmap/fetching/generic.py +81 -73
- openeo_gfmap/fetching/s1.py +1 -3
- openeo_gfmap/fetching/s2.py +1 -0
- openeo_gfmap/inference/model_inference.py +5 -2
- openeo_gfmap/manager/job_manager.py +271 -84
- openeo_gfmap/manager/job_splitters.py +169 -21
- openeo_gfmap/preprocessing/sar.py +12 -33
- openeo_gfmap/stac/constants.py +1 -1
- openeo_gfmap/utils/__init__.py +16 -0
- openeo_gfmap/utils/catalogue.py +172 -35
- openeo_gfmap/utils/split_stac.py +125 -0
- {openeo_gfmap-0.1.0.dist-info → openeo_gfmap-0.3.0.dist-info}/METADATA +5 -4
- {openeo_gfmap-0.1.0.dist-info → openeo_gfmap-0.3.0.dist-info}/RECORD +18 -18
- {openeo_gfmap-0.1.0.dist-info → openeo_gfmap-0.3.0.dist-info}/WHEEL +1 -1
- openeo_gfmap/fetching/meteo.py +0 -126
- {openeo_gfmap-0.1.0.dist-info → openeo_gfmap-0.3.0.dist-info}/licenses/LICENSE +0 -0
@@ -8,25 +8,69 @@ from typing import List
|
|
8
8
|
import geopandas as gpd
|
9
9
|
import h3
|
10
10
|
import requests
|
11
|
+
import s2sphere
|
11
12
|
|
12
13
|
from openeo_gfmap.manager import _log
|
13
14
|
|
14
15
|
|
15
|
-
def load_s2_grid() -> gpd.GeoDataFrame:
|
16
|
+
def load_s2_grid(web_mercator: bool = False) -> gpd.GeoDataFrame:
|
16
17
|
"""Returns a geo data frame from the S2 grid."""
|
17
18
|
# Builds the path where the geodataframe should be
|
18
|
-
|
19
|
+
if not web_mercator:
|
20
|
+
gdf_path = Path.home() / ".openeo-gfmap" / "s2grid_bounds_4326_v2.geoparquet"
|
21
|
+
url = "https://artifactory.vgt.vito.be/artifactory/auxdata-public/gfmap/s2grid_bounds_4326_v2.geoparquet"
|
22
|
+
else:
|
23
|
+
gdf_path = Path.home() / ".openeo-gfmap" / "s2grid_bounds_3857_v2.geoparquet"
|
24
|
+
url = "https://artifactory.vgt.vito.be/artifactory/auxdata-public/gfmap/s2grid_bounds_3857_v2.geoparquet"
|
25
|
+
|
19
26
|
if not gdf_path.exists():
|
20
27
|
_log.info("S2 grid not found, downloading it from artifactory.")
|
21
28
|
# Downloads the file from the artifactory URL
|
22
29
|
gdf_path.parent.mkdir(exist_ok=True)
|
23
30
|
response = requests.get(
|
24
|
-
|
31
|
+
url,
|
25
32
|
timeout=180, # 3mins
|
26
33
|
)
|
34
|
+
if response.status_code != 200:
|
35
|
+
raise ValueError(
|
36
|
+
"Failed to download the S2 grid from the artifactory. "
|
37
|
+
f"Status code: {response.status_code}"
|
38
|
+
)
|
27
39
|
with open(gdf_path, "wb") as f:
|
28
40
|
f.write(response.content)
|
29
|
-
return gpd.
|
41
|
+
return gpd.read_parquet(gdf_path)
|
42
|
+
|
43
|
+
|
44
|
+
def load_s2_grid_centroids(web_mercator: bool = False) -> gpd.GeoDataFrame:
|
45
|
+
"""Returns a geo data frame from the S2 grid centroids."""
|
46
|
+
# Builds the path where the geodataframe should be
|
47
|
+
if not web_mercator:
|
48
|
+
gdf_path = (
|
49
|
+
Path.home() / ".openeo-gfmap" / "s2grid_bounds_4326_centroids.geoparquet"
|
50
|
+
)
|
51
|
+
url = "https://artifactory.vgt.vito.be/artifactory/auxdata-public/gfmap/s2grid_bounds_4326_centroids.geoparquet"
|
52
|
+
else:
|
53
|
+
gdf_path = (
|
54
|
+
Path.home() / ".openeo-gfmap" / "s2grid_bounds_3857_centroids.geoparquet"
|
55
|
+
)
|
56
|
+
url = "https://artifactory.vgt.vito.be/artifactory/auxdata-public/gfmap/s2grid_bounds_3857_centroids.geoparquet"
|
57
|
+
|
58
|
+
if not gdf_path.exists():
|
59
|
+
_log.info("S2 grid centroids not found, downloading it from artifactory.")
|
60
|
+
# Downloads the file from the artifactory URL
|
61
|
+
gdf_path.parent.mkdir(exist_ok=True)
|
62
|
+
response = requests.get(
|
63
|
+
url,
|
64
|
+
timeout=180, # 3mins
|
65
|
+
)
|
66
|
+
if response.status_code != 200:
|
67
|
+
raise ValueError(
|
68
|
+
"Failed to download the S2 grid centroids from the artifactory. "
|
69
|
+
f"Status code: {response.status_code}"
|
70
|
+
)
|
71
|
+
with open(gdf_path, "wb") as f:
|
72
|
+
f.write(response.content)
|
73
|
+
return gpd.read_parquet(gdf_path)
|
30
74
|
|
31
75
|
|
32
76
|
def _resplit_group(
|
@@ -38,7 +82,7 @@ def _resplit_group(
|
|
38
82
|
|
39
83
|
|
40
84
|
def split_job_s2grid(
|
41
|
-
polygons: gpd.GeoDataFrame, max_points: int = 500
|
85
|
+
polygons: gpd.GeoDataFrame, max_points: int = 500, web_mercator: bool = False
|
42
86
|
) -> List[gpd.GeoDataFrame]:
|
43
87
|
"""Split a job into multiple jobs from the position of the polygons/points. The centroid of
|
44
88
|
the geometries to extract are used to select tile in the Sentinel-2 tile grid.
|
@@ -60,17 +104,24 @@ def split_job_s2grid(
|
|
60
104
|
if polygons.crs is None:
|
61
105
|
raise ValueError("The GeoDataFrame must contain a CRS")
|
62
106
|
|
63
|
-
|
64
|
-
if polygons.geometry.geom_type[0] != "Point":
|
65
|
-
polygons["geometry"] = polygons.geometry.centroid
|
107
|
+
epsg = 3857 if web_mercator else 4326
|
66
108
|
|
67
|
-
|
68
|
-
s2_grid = load_s2_grid()
|
69
|
-
s2_grid["geometry"] = s2_grid.geometry.centroid
|
109
|
+
original_crs = polygons.crs
|
70
110
|
|
71
|
-
polygons =
|
72
|
-
|
73
|
-
|
111
|
+
polygons = polygons.to_crs(epsg=epsg)
|
112
|
+
|
113
|
+
polygons["centroid"] = polygons.geometry.centroid
|
114
|
+
|
115
|
+
# Dataset containing all the S2 tile centroids, find the nearest S2 tile for each point
|
116
|
+
s2_grid = load_s2_grid_centroids(web_mercator)
|
117
|
+
|
118
|
+
s2_grid = s2_grid[s2_grid.cdse_valid]
|
119
|
+
|
120
|
+
polygons = gpd.sjoin_nearest(
|
121
|
+
polygons.set_geometry("centroid"), s2_grid[["tile", "geometry"]]
|
122
|
+
).drop(columns=["index_right", "centroid"])
|
123
|
+
|
124
|
+
polygons = polygons.set_geometry("geometry").to_crs(original_crs)
|
74
125
|
|
75
126
|
split_datasets = []
|
76
127
|
for _, sub_gdf in polygons.groupby("tile"):
|
@@ -86,12 +137,15 @@ def append_h3_index(
|
|
86
137
|
polygons: gpd.GeoDataFrame, grid_resolution: int = 3
|
87
138
|
) -> gpd.GeoDataFrame:
|
88
139
|
"""Append the H3 index to the polygons."""
|
89
|
-
|
90
|
-
|
91
|
-
|
92
|
-
|
140
|
+
|
141
|
+
# Project to Web mercator to calculate centroids
|
142
|
+
polygons = polygons.to_crs(epsg=3857)
|
143
|
+
geom_col = polygons.geometry.centroid
|
144
|
+
# Project to lat lon to calculate the h3 index
|
145
|
+
geom_col = geom_col.to_crs(epsg=4326)
|
146
|
+
|
93
147
|
polygons["h3index"] = geom_col.apply(
|
94
|
-
lambda pt: h3.
|
148
|
+
lambda pt: h3.latlng_to_cell(pt.y, pt.x, grid_resolution)
|
95
149
|
)
|
96
150
|
return polygons
|
97
151
|
|
@@ -127,12 +181,13 @@ def split_job_hex(
|
|
127
181
|
if polygons.crs is None:
|
128
182
|
raise ValueError("The GeoDataFrame must contain a CRS")
|
129
183
|
|
130
|
-
|
131
|
-
polygons = polygons.to_crs(epsg=4326)
|
184
|
+
original_crs = polygons.crs
|
132
185
|
|
133
186
|
# Split the polygons into multiple jobs
|
134
187
|
polygons = append_h3_index(polygons, grid_resolution)
|
135
188
|
|
189
|
+
polygons = polygons.to_crs(original_crs)
|
190
|
+
|
136
191
|
split_datasets = []
|
137
192
|
for _, sub_gdf in polygons.groupby("h3index"):
|
138
193
|
if len(sub_gdf) > max_points:
|
@@ -142,3 +197,96 @@ def split_job_hex(
|
|
142
197
|
split_datasets.append(sub_gdf.reset_index(drop=True))
|
143
198
|
|
144
199
|
return split_datasets
|
200
|
+
|
201
|
+
|
202
|
+
def split_job_s2sphere(
|
203
|
+
gdf: gpd.GeoDataFrame, max_points=500, start_level=8
|
204
|
+
) -> List[gpd.GeoDataFrame]:
|
205
|
+
"""
|
206
|
+
EXPERIMENTAL
|
207
|
+
Split a GeoDataFrame into multiple groups based on the S2geometry cell ID of each geometry.
|
208
|
+
|
209
|
+
S2geometry is a library that provides a way to index and query spatial data. This function splits
|
210
|
+
the GeoDataFrame into groups based on the S2 cell ID of each geometry, based on it's centroid.
|
211
|
+
|
212
|
+
If a cell contains more points than max_points, it will be recursively split into
|
213
|
+
smaller cells until each cell contains at most max_points points.
|
214
|
+
|
215
|
+
More information on S2geometry can be found at https://s2geometry.io/
|
216
|
+
An overview of the S2 cell hierarchy can be found at https://s2geometry.io/resources/s2cell_statistics.html
|
217
|
+
|
218
|
+
:param gdf: GeoDataFrame containing points to split
|
219
|
+
:param max_points: Maximum number of points per group
|
220
|
+
:param start_level: Starting S2 cell level
|
221
|
+
:return: List of GeoDataFrames containing the split groups
|
222
|
+
"""
|
223
|
+
|
224
|
+
if "geometry" not in gdf.columns:
|
225
|
+
raise ValueError("The GeoDataFrame must contain a 'geometry' column.")
|
226
|
+
|
227
|
+
if gdf.crs is None:
|
228
|
+
raise ValueError("The GeoDataFrame must contain a CRS")
|
229
|
+
|
230
|
+
# Store the original CRS of the GeoDataFrame and reproject to EPSG:3857
|
231
|
+
original_crs = gdf.crs
|
232
|
+
gdf = gdf.to_crs(epsg=3857)
|
233
|
+
|
234
|
+
# Add a centroid column to the GeoDataFrame and convert it to EPSG:4326
|
235
|
+
gdf["centroid"] = gdf.geometry.centroid
|
236
|
+
|
237
|
+
# Reproject the GeoDataFrame to its orginial CRS
|
238
|
+
gdf = gdf.to_crs(original_crs)
|
239
|
+
|
240
|
+
# Set the GeoDataFrame's geometry to the centroid column and reproject to EPSG:4326
|
241
|
+
gdf = gdf.set_geometry("centroid")
|
242
|
+
gdf = gdf.to_crs(epsg=4326)
|
243
|
+
|
244
|
+
# Create a dictionary to store points by their S2 cell ID
|
245
|
+
cell_dict = {}
|
246
|
+
|
247
|
+
# Iterate over each point in the GeoDataFrame
|
248
|
+
for idx, row in gdf.iterrows():
|
249
|
+
# Get the S2 cell ID for the point at a given level
|
250
|
+
cell_id = _get_s2cell_id(row.centroid, start_level)
|
251
|
+
|
252
|
+
if cell_id not in cell_dict:
|
253
|
+
cell_dict[cell_id] = []
|
254
|
+
|
255
|
+
cell_dict[cell_id].append(row)
|
256
|
+
|
257
|
+
result_groups = []
|
258
|
+
|
259
|
+
# Function to recursively split cells if they contain more points than max_points
|
260
|
+
def _split_s2cell(cell_id, points, current_level=start_level):
|
261
|
+
if len(points) <= max_points:
|
262
|
+
if len(points) > 0:
|
263
|
+
points = gpd.GeoDataFrame(
|
264
|
+
points, crs=original_crs, geometry="geometry"
|
265
|
+
).drop(columns=["centroid"])
|
266
|
+
points["s2sphere_cell_id"] = cell_id
|
267
|
+
points["s2sphere_cell_level"] = current_level
|
268
|
+
result_groups.append(gpd.GeoDataFrame(points))
|
269
|
+
else:
|
270
|
+
children = s2sphere.CellId(cell_id).children()
|
271
|
+
child_cells = {child.id(): [] for child in children}
|
272
|
+
|
273
|
+
for point in points:
|
274
|
+
child_cell_id = _get_s2cell_id(point.centroid, current_level + 1)
|
275
|
+
child_cells[child_cell_id].append(point)
|
276
|
+
|
277
|
+
for child_cell_id, child_points in child_cells.items():
|
278
|
+
_split_s2cell(child_cell_id, child_points, current_level + 1)
|
279
|
+
|
280
|
+
# Split cells that contain more points than max_points
|
281
|
+
for cell_id, points in cell_dict.items():
|
282
|
+
_split_s2cell(cell_id, points)
|
283
|
+
|
284
|
+
return result_groups
|
285
|
+
|
286
|
+
|
287
|
+
def _get_s2cell_id(point, level):
|
288
|
+
lat, lon = point.y, point.x
|
289
|
+
cell_id = s2sphere.CellId.from_lat_lng(
|
290
|
+
s2sphere.LatLng.from_degrees(lat, lon)
|
291
|
+
).parent(level)
|
292
|
+
return cell_id.id()
|
@@ -1,9 +1,9 @@
|
|
1
1
|
"""Routines to pre-process sar signals."""
|
2
2
|
|
3
3
|
import openeo
|
4
|
-
from openeo.processes import array_create,
|
4
|
+
from openeo.processes import array_create, power
|
5
5
|
|
6
|
-
from openeo_gfmap import
|
6
|
+
from openeo_gfmap import BackendContext
|
7
7
|
|
8
8
|
|
9
9
|
def compress_backscatter_uint16(
|
@@ -27,38 +27,17 @@ def compress_backscatter_uint16(
|
|
27
27
|
openeo.DataCube
|
28
28
|
The datacube with the backscatter values compressed to uint16.
|
29
29
|
"""
|
30
|
-
backend = backend_context.backend
|
31
30
|
|
32
|
-
#
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
[
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
power(base=10, p=(10.0 * x[0].log(base=10) + 83.0) / 20.0),
|
43
|
-
),
|
44
|
-
if_(
|
45
|
-
is_nodata(x[1]),
|
46
|
-
1,
|
47
|
-
power(base=10, p=(10.0 * x[1].log(base=10) + 83.0) / 20.0),
|
48
|
-
),
|
49
|
-
]
|
50
|
-
),
|
51
|
-
)
|
52
|
-
else:
|
53
|
-
cube = cube.apply_dimension(
|
54
|
-
dimension="bands",
|
55
|
-
process=lambda x: array_create(
|
56
|
-
[
|
57
|
-
power(base=10, p=(10.0 * x[0].log(base=10) + 83.0) / 20.0),
|
58
|
-
power(base=10, p=(10.0 * x[1].log(base=10) + 83.0) / 20.0),
|
59
|
-
]
|
60
|
-
),
|
61
|
-
)
|
31
|
+
# Apply rescaling of power values in a logarithmic way
|
32
|
+
cube = cube.apply_dimension(
|
33
|
+
dimension="bands",
|
34
|
+
process=lambda x: array_create(
|
35
|
+
[
|
36
|
+
power(base=10, p=(10.0 * x[0].log(base=10) + 83.0) / 20.0),
|
37
|
+
power(base=10, p=(10.0 * x[1].log(base=10) + 83.0) / 20.0),
|
38
|
+
]
|
39
|
+
),
|
40
|
+
)
|
62
41
|
|
63
42
|
# Change the data type to uint16 for optimization purposes
|
64
43
|
return cube.linear_scale_range(1, 65534, 1, 65534)
|
openeo_gfmap/stac/constants.py
CHANGED
openeo_gfmap/utils/__init__.py
CHANGED
@@ -1,8 +1,11 @@
|
|
1
1
|
"""This sub-module contains utilitary function and tools for OpenEO-GFMap"""
|
2
2
|
|
3
|
+
import logging
|
4
|
+
|
3
5
|
from openeo_gfmap.utils.build_df import load_json
|
4
6
|
from openeo_gfmap.utils.intervals import quintad_intervals
|
5
7
|
from openeo_gfmap.utils.netcdf import update_nc_attributes
|
8
|
+
from openeo_gfmap.utils.split_stac import split_collection_by_epsg
|
6
9
|
from openeo_gfmap.utils.tile_processing import (
|
7
10
|
array_bounds,
|
8
11
|
arrays_cosine_similarity,
|
@@ -11,6 +14,18 @@ from openeo_gfmap.utils.tile_processing import (
|
|
11
14
|
select_sar_bands,
|
12
15
|
)
|
13
16
|
|
17
|
+
_log = logging.getLogger(__name__)
|
18
|
+
_log.setLevel(logging.INFO)
|
19
|
+
|
20
|
+
ch = logging.StreamHandler()
|
21
|
+
ch.setLevel(logging.INFO)
|
22
|
+
|
23
|
+
formatter = logging.Formatter("%(asctime)s - %(name)s - %(levelname)s - %(message)s")
|
24
|
+
ch.setFormatter(formatter)
|
25
|
+
|
26
|
+
_log.addHandler(ch)
|
27
|
+
|
28
|
+
|
14
29
|
__all__ = [
|
15
30
|
"load_json",
|
16
31
|
"normalize_array",
|
@@ -19,5 +34,6 @@ __all__ = [
|
|
19
34
|
"select_sar_bands",
|
20
35
|
"arrays_cosine_similarity",
|
21
36
|
"quintad_intervals",
|
37
|
+
"split_collection_by_epsg",
|
22
38
|
"update_nc_attributes",
|
23
39
|
]
|