openeo-gfmap 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- openeo_gfmap/__init__.py +23 -0
- openeo_gfmap/backend.py +122 -0
- openeo_gfmap/features/__init__.py +17 -0
- openeo_gfmap/features/feature_extractor.py +389 -0
- openeo_gfmap/fetching/__init__.py +21 -0
- openeo_gfmap/fetching/commons.py +213 -0
- openeo_gfmap/fetching/fetching.py +98 -0
- openeo_gfmap/fetching/generic.py +165 -0
- openeo_gfmap/fetching/meteo.py +126 -0
- openeo_gfmap/fetching/s1.py +195 -0
- openeo_gfmap/fetching/s2.py +236 -0
- openeo_gfmap/inference/__init__.py +3 -0
- openeo_gfmap/inference/model_inference.py +347 -0
- openeo_gfmap/manager/__init__.py +31 -0
- openeo_gfmap/manager/job_manager.py +469 -0
- openeo_gfmap/manager/job_splitters.py +144 -0
- openeo_gfmap/metadata.py +24 -0
- openeo_gfmap/preprocessing/__init__.py +22 -0
- openeo_gfmap/preprocessing/cloudmasking.py +268 -0
- openeo_gfmap/preprocessing/compositing.py +74 -0
- openeo_gfmap/preprocessing/interpolation.py +12 -0
- openeo_gfmap/preprocessing/sar.py +64 -0
- openeo_gfmap/preprocessing/scaling.py +65 -0
- openeo_gfmap/preprocessing/udf_cldmask.py +36 -0
- openeo_gfmap/preprocessing/udf_rank.py +37 -0
- openeo_gfmap/preprocessing/udf_score.py +103 -0
- openeo_gfmap/spatial.py +53 -0
- openeo_gfmap/stac/__init__.py +2 -0
- openeo_gfmap/stac/constants.py +51 -0
- openeo_gfmap/temporal.py +22 -0
- openeo_gfmap/utils/__init__.py +23 -0
- openeo_gfmap/utils/build_df.py +48 -0
- openeo_gfmap/utils/catalogue.py +248 -0
- openeo_gfmap/utils/intervals.py +64 -0
- openeo_gfmap/utils/netcdf.py +25 -0
- openeo_gfmap/utils/tile_processing.py +64 -0
- openeo_gfmap-0.1.0.dist-info/METADATA +57 -0
- openeo_gfmap-0.1.0.dist-info/RECORD +40 -0
- openeo_gfmap-0.1.0.dist-info/WHEEL +4 -0
- openeo_gfmap-0.1.0.dist-info/licenses/LICENSE +201 -0
@@ -0,0 +1,268 @@
|
|
1
|
+
"""Different cloud masking strategies for an OpenEO datacubes."""
|
2
|
+
|
3
|
+
from pathlib import Path
|
4
|
+
from typing import Union
|
5
|
+
|
6
|
+
import openeo
|
7
|
+
from openeo.processes import if_, is_nan
|
8
|
+
|
9
|
+
SCL_HARMONIZED_NAME: str = "S2-L2A-SCL"
|
10
|
+
BAPSCORE_HARMONIZED_NAME: str = "S2-L2A-BAPSCORE"
|
11
|
+
|
12
|
+
|
13
|
+
def mask_scl_dilation(cube: openeo.DataCube, **params: dict) -> openeo.DataCube:
|
14
|
+
"""Creates a mask from the SCL, dilates it and applies the mask to the optical
|
15
|
+
bands of the datacube. The other bands such as DEM, SAR and METEO will not
|
16
|
+
be affected by the mask.
|
17
|
+
"""
|
18
|
+
# Asserts if the SCL layer exists
|
19
|
+
assert (
|
20
|
+
SCL_HARMONIZED_NAME in cube.metadata.band_names
|
21
|
+
), f"The SCL band ({SCL_HARMONIZED_NAME}) is not present in the datacube."
|
22
|
+
|
23
|
+
kernel1_size = params.get("kernel1_size", 17)
|
24
|
+
kernel2_size = params.get("kernel2_size", 3)
|
25
|
+
erosion_kernel_size = params.get("erosion_kernel_size", 3)
|
26
|
+
|
27
|
+
# TODO adapt the dilation size given the mask size in meters
|
28
|
+
# TODO check how to get the spatial resolution from the cube metadata
|
29
|
+
|
30
|
+
# Only applies the filtering to the optical part of the cube
|
31
|
+
optical_cube = cube.filter_bands(
|
32
|
+
bands=list(filter(lambda band: band.startswith("S2"), cube.metadata.band_names))
|
33
|
+
)
|
34
|
+
|
35
|
+
nonoptical_cube = cube.filter_bands(
|
36
|
+
bands=list(
|
37
|
+
filter(lambda band: not band.startswith("S2"), cube.metadata.band_names)
|
38
|
+
)
|
39
|
+
)
|
40
|
+
|
41
|
+
optical_cube = optical_cube.process(
|
42
|
+
"mask_scl_dilation",
|
43
|
+
data=optical_cube,
|
44
|
+
scl_band_name=SCL_HARMONIZED_NAME,
|
45
|
+
kernel1_size=kernel1_size,
|
46
|
+
kernel2_size=kernel2_size,
|
47
|
+
mask1_values=[2, 4, 5, 6, 7],
|
48
|
+
mask2_values=[3, 8, 9, 10, 11],
|
49
|
+
erosion_kernel_size=erosion_kernel_size,
|
50
|
+
)
|
51
|
+
|
52
|
+
if len(nonoptical_cube.metadata.band_names) == 0:
|
53
|
+
return optical_cube
|
54
|
+
|
55
|
+
return optical_cube.merge_cubes(nonoptical_cube)
|
56
|
+
|
57
|
+
|
58
|
+
def get_bap_score(cube: openeo.DataCube, **params: dict) -> openeo.DataCube:
|
59
|
+
"""Calculates the Best Available Pixel (BAP) score for the given datacube,
|
60
|
+
computed from the SCL layer.
|
61
|
+
|
62
|
+
The BAP score is calculated via using a UDF, which gives a lot of
|
63
|
+
flexibility on the calculation methodology. The BAP score is a weighted
|
64
|
+
average of three scores:
|
65
|
+
* Distance-to-Cloud Score: Pixels that are clouds are given score 0.
|
66
|
+
Pixels that are moren than 50 pixels - calculated with the Manhattan
|
67
|
+
distance measure - away from a cloud pixel are given score 1. The pixels
|
68
|
+
in between are given a score versus distance-to-cloud that follows a
|
69
|
+
Gaussian shape.
|
70
|
+
* Coverage Score: Per date, the percentage of all pixels that are classified
|
71
|
+
as a cloud over the entire spatial extent is calculated. The Coverage
|
72
|
+
Score is then equal to 1 - the cloud percentage.
|
73
|
+
* Date Score: In order to favor pixels that are observed in the middle of a
|
74
|
+
month, a date score is calculated, which follows a Gaussian shape. I.e.
|
75
|
+
the largest scores are given for days in the middle of the month, the
|
76
|
+
lowest scores are given for days at the beginning and end of the month.
|
77
|
+
|
78
|
+
The final BAP score is a weighted average of the three aforementioned
|
79
|
+
scores. The weights are 1, 0.5 and 0.8 for the Distance-to-Cloud, Coverage
|
80
|
+
and Date Score respectively.
|
81
|
+
|
82
|
+
Parameters
|
83
|
+
----------
|
84
|
+
cube : openeo.DataCube
|
85
|
+
The datacube to compute the BAP score from, only the SCL band is used.
|
86
|
+
params : dict
|
87
|
+
Addtional parameters to add to this routine.
|
88
|
+
* `apply_scl_dilation`: Whether to apply dilation to the SCL mask before computing the BAP.
|
89
|
+
* `kernel1_size`: The size of the first kernel used for the dilation of the SCL mask.
|
90
|
+
* `kernel2_size`: The size of the second kernel used for the dilation of the SCL mask.
|
91
|
+
* `erosion_kernel_size`: The size of the kernel used for the erosion of the SCL mask.
|
92
|
+
|
93
|
+
Returns
|
94
|
+
-------
|
95
|
+
openeo.DataCube
|
96
|
+
A 4D datacube containing the BAP score as name 'S2-L2A-BAPSCORE'.
|
97
|
+
"""
|
98
|
+
udf_path = Path(__file__).parent / "udf_score.py"
|
99
|
+
|
100
|
+
# Select the SCL band
|
101
|
+
scl_cube = cube.filter_bands([SCL_HARMONIZED_NAME])
|
102
|
+
|
103
|
+
if params.get("apply_scl_dilation", False):
|
104
|
+
kernel1_size = params.get("kernel1_size", 17)
|
105
|
+
kernel2_size = params.get("kernel2_size", 3)
|
106
|
+
erosion_kernel_size = params.get("erosion_kernel_size", 3)
|
107
|
+
|
108
|
+
scl_cube = scl_cube.process(
|
109
|
+
"to_scl_dilation_mask",
|
110
|
+
data=scl_cube,
|
111
|
+
scl_band_name=SCL_HARMONIZED_NAME,
|
112
|
+
kernel1_size=kernel1_size,
|
113
|
+
kernel2_size=kernel2_size,
|
114
|
+
mask1_values=[2, 4, 5, 6, 7],
|
115
|
+
mask2_values=[3, 8, 9, 10, 11],
|
116
|
+
erosion_kernel_size=erosion_kernel_size,
|
117
|
+
)
|
118
|
+
|
119
|
+
# Replace NaN to 0 to avoid issues in the UDF
|
120
|
+
scl_cube = scl_cube.apply(lambda x: if_(is_nan(x), 0, x))
|
121
|
+
|
122
|
+
score = scl_cube.apply_neighborhood(
|
123
|
+
process=openeo.UDF.from_file(str(udf_path)),
|
124
|
+
size=[
|
125
|
+
{"dimension": "x", "unit": "px", "value": 256},
|
126
|
+
{"dimension": "y", "unit": "px", "value": 256},
|
127
|
+
],
|
128
|
+
overlap=[
|
129
|
+
{"dimension": "x", "unit": "px", "value": 16},
|
130
|
+
{"dimension": "y", "unit": "px", "value": 16},
|
131
|
+
],
|
132
|
+
)
|
133
|
+
|
134
|
+
score = score.rename_labels("bands", [BAPSCORE_HARMONIZED_NAME])
|
135
|
+
|
136
|
+
# Merge the score to the scl cube
|
137
|
+
return score
|
138
|
+
|
139
|
+
|
140
|
+
def get_bap_mask(cube: openeo.DataCube, period: Union[str, list], **params: dict):
|
141
|
+
"""Computes the bap score and masks the optical bands of the datacube using
|
142
|
+
the best scores for each pixel on a given time period. This method both
|
143
|
+
performs cloud masking but also a type of compositing.
|
144
|
+
|
145
|
+
The BAP mask is computed using the method `get_bap_score`, from which the
|
146
|
+
maximum argument is taken on every pixel on the given period. This will
|
147
|
+
therefore return an array that for each optical observation, will return
|
148
|
+
if the pixel must be loaded or not, allowing for high cost optimization.
|
149
|
+
|
150
|
+
Parameters
|
151
|
+
----------
|
152
|
+
cube : openeo.DataCube
|
153
|
+
The datacube to be processed.
|
154
|
+
period : Union[str, list]
|
155
|
+
A string or a list of dates (in YYYY-mm-dd format) to be used as the
|
156
|
+
temporal period to compute the BAP score.
|
157
|
+
params : dict
|
158
|
+
Additionals parameters, not used yet.
|
159
|
+
Returns
|
160
|
+
-------
|
161
|
+
openeo.DataCube
|
162
|
+
The datacube with the BAP mask applied.
|
163
|
+
"""
|
164
|
+
# Checks if the S2-L2A-SCL band is present in the datacube
|
165
|
+
assert (
|
166
|
+
SCL_HARMONIZED_NAME in cube.metadata.band_names
|
167
|
+
), f"The {SCL_HARMONIZED_NAME} band is not present in the datacube."
|
168
|
+
|
169
|
+
bap_score = get_bap_score(cube, **params)
|
170
|
+
|
171
|
+
if isinstance(period, str):
|
172
|
+
|
173
|
+
def max_score_selection(score):
|
174
|
+
max_score = score.max()
|
175
|
+
return score.array_apply(lambda x: x != max_score)
|
176
|
+
|
177
|
+
rank_mask = bap_score.apply_neighborhood(
|
178
|
+
max_score_selection,
|
179
|
+
size=[
|
180
|
+
{"dimension": "x", "unit": "px", "value": 1},
|
181
|
+
{"dimension": "y", "unit": "px", "value": 1},
|
182
|
+
{"dimension": "t", "value": period},
|
183
|
+
],
|
184
|
+
overlap=[],
|
185
|
+
)
|
186
|
+
elif isinstance(period, list):
|
187
|
+
udf_path = Path(__file__).parent / "udf_rank.py"
|
188
|
+
rank_mask = bap_score.apply_neighborhood(
|
189
|
+
process=openeo.UDF.from_file(str(udf_path), context={"intervals": period}),
|
190
|
+
size=[
|
191
|
+
{"dimension": "x", "unit": "px", "value": 256},
|
192
|
+
{"dimension": "y", "unit": "px", "value": 256},
|
193
|
+
],
|
194
|
+
overlap=[],
|
195
|
+
)
|
196
|
+
else:
|
197
|
+
raise ValueError(
|
198
|
+
f"'period' must be a string or a list of dates (in YYYY-mm-dd format), got {period}."
|
199
|
+
)
|
200
|
+
|
201
|
+
return rank_mask.rename_labels("bands", ["S2-L2A-BAPMASK"])
|
202
|
+
|
203
|
+
|
204
|
+
def bap_masking(cube: openeo.DataCube, period: Union[str, list], **params: dict):
|
205
|
+
"""Computes the bap mask as described in `get_bap_mask` and applies it to
|
206
|
+
the optical part of the cube.
|
207
|
+
|
208
|
+
Parameters
|
209
|
+
----------
|
210
|
+
cube : openeo.DataCube
|
211
|
+
The datacube to be processed.
|
212
|
+
period : Union[str, list]
|
213
|
+
A string or a list of dates (in YYYY-mm-dd format) to be used as the
|
214
|
+
temporal period to compute the BAP score.
|
215
|
+
params : dict
|
216
|
+
Additionals parameters, not used yet.
|
217
|
+
Returns
|
218
|
+
-------
|
219
|
+
openeo.DataCube
|
220
|
+
The datacube with the BAP mask applied.
|
221
|
+
"""
|
222
|
+
optical_cube = cube.filter_bands(
|
223
|
+
bands=list(filter(lambda band: band.startswith("S2"), cube.metadata.band_names))
|
224
|
+
)
|
225
|
+
|
226
|
+
nonoptical_cube = cube.filter_bands(
|
227
|
+
bands=list(
|
228
|
+
filter(lambda band: not band.startswith("S2"), cube.metadata.band_names)
|
229
|
+
)
|
230
|
+
)
|
231
|
+
|
232
|
+
rank_mask = get_bap_mask(optical_cube, period, **params)
|
233
|
+
|
234
|
+
optical_cube = optical_cube.mask(rank_mask.resample_cube_spatial(cube))
|
235
|
+
|
236
|
+
# Do not merge if bands are empty!
|
237
|
+
if len(nonoptical_cube.metadata.band_names) == 0:
|
238
|
+
return optical_cube
|
239
|
+
|
240
|
+
return optical_cube.merge_cubes(nonoptical_cube)
|
241
|
+
|
242
|
+
|
243
|
+
def cloudmask_percentage(
|
244
|
+
cube: openeo.DataCube, percentage: float = 0.95
|
245
|
+
) -> openeo.DataCube:
|
246
|
+
"""Compute a cloud mask array, that either fully covers an observation or is empty.
|
247
|
+
It computes the percentage of HIGH_CLOUD_PROBABILITY pixels in the SCL mask. If the percentage
|
248
|
+
is higher than the given threshold, the mask will be covering the observation, otherwise False.
|
249
|
+
"""
|
250
|
+
non_scl_cube = cube.filter_bands(
|
251
|
+
bands=list(filter(lambda band: "SCL" not in band, cube.metadata.band_names))
|
252
|
+
)
|
253
|
+
|
254
|
+
scl_cube = cube.filter_bands(["SCL"])
|
255
|
+
|
256
|
+
cloud_mask = scl_cube.apply_neighborhood(
|
257
|
+
process=openeo.UDF.from_file("udf_mask.py", context={}),
|
258
|
+
size=[
|
259
|
+
{"dimension": "x", "unit": "px", "value": 1024},
|
260
|
+
{"dimension": "y", "unit": "px", "value": 1024},
|
261
|
+
{"dimension": "t", "value": 1},
|
262
|
+
],
|
263
|
+
overlap=[],
|
264
|
+
)
|
265
|
+
|
266
|
+
non_scl_cube = non_scl_cube.mask(cloud_mask.resample_cube_spatial(cube))
|
267
|
+
|
268
|
+
return non_scl_cube.merge_cubes(scl_cube)
|
@@ -0,0 +1,74 @@
|
|
1
|
+
"""Temporal compositing, or temporal aggregation, is a method to increase the
|
2
|
+
quality of data within timesteps by reducing the temporal resolution of a time
|
3
|
+
series of satellite images.
|
4
|
+
"""
|
5
|
+
|
6
|
+
from typing import Union
|
7
|
+
|
8
|
+
import openeo
|
9
|
+
|
10
|
+
|
11
|
+
def median_compositing(
|
12
|
+
cube: openeo.DataCube, period: Union[str, list]
|
13
|
+
) -> openeo.DataCube:
|
14
|
+
"""Perfrom median compositing on the given datacube."""
|
15
|
+
if isinstance(period, str):
|
16
|
+
return cube.aggregate_temporal_period(
|
17
|
+
period=period, reducer="median", dimension="t"
|
18
|
+
)
|
19
|
+
elif isinstance(period, list):
|
20
|
+
return cube.aggregate_temporal(
|
21
|
+
intervals=period, reducer="median", dimension="t"
|
22
|
+
)
|
23
|
+
|
24
|
+
|
25
|
+
def mean_compositing(
|
26
|
+
cube: openeo.DataCube, period: Union[str, list]
|
27
|
+
) -> openeo.DataCube:
|
28
|
+
"""Perfrom mean compositing on the given datacube."""
|
29
|
+
if isinstance(period, str):
|
30
|
+
return cube.aggregate_temporal_period(
|
31
|
+
period=period, reducer="mean", dimension="t"
|
32
|
+
)
|
33
|
+
elif isinstance(period, list):
|
34
|
+
return cube.aggregate_temporal(intervals=period, reducer="mean", dimension="t")
|
35
|
+
|
36
|
+
|
37
|
+
def sum_compositing(cube: openeo.DataCube, period: Union[str, list]) -> openeo.DataCube:
|
38
|
+
"""Perform sum compositing on the given datacube."""
|
39
|
+
if isinstance(period, str):
|
40
|
+
return cube.aggregate_temporal_period(
|
41
|
+
period=period, reducer="sum", dimension="t"
|
42
|
+
)
|
43
|
+
elif isinstance(period, list):
|
44
|
+
return cube.aggregate_temporal(intervals=period, reducer="sum", dimension="t")
|
45
|
+
|
46
|
+
|
47
|
+
def max_ndvi_compositing(cube: openeo.DataCube, period: str) -> openeo.DataCube:
|
48
|
+
"""Perform compositing by selecting the observation with the highest NDVI value over the
|
49
|
+
given compositing window."""
|
50
|
+
|
51
|
+
def max_ndvi_selection(ndvi: openeo.DataCube):
|
52
|
+
max_ndvi = ndvi.max()
|
53
|
+
return ndvi.array_apply(lambda x: x != max_ndvi)
|
54
|
+
|
55
|
+
if isinstance(period, str):
|
56
|
+
ndvi = cube.ndvi(nir="S2-L2A-B08", red="S2-L2A-B04")
|
57
|
+
|
58
|
+
rank_mask = ndvi.apply_neighborhood(
|
59
|
+
max_ndvi_selection,
|
60
|
+
size=[
|
61
|
+
{"dimension": "x", "unit": "px", "value": 1},
|
62
|
+
{"dimension": "y", "unit": "px", "value": 1},
|
63
|
+
{"dimension": "t", "value": period},
|
64
|
+
],
|
65
|
+
overlap=[],
|
66
|
+
)
|
67
|
+
|
68
|
+
cube = cube.mask(mask=rank_mask).aggregate_temporal_period(period, "first")
|
69
|
+
|
70
|
+
else:
|
71
|
+
raise ValueError(
|
72
|
+
"Custom temporal intervals are not yet supported for max NDVI compositing."
|
73
|
+
)
|
74
|
+
return cube
|
@@ -0,0 +1,12 @@
|
|
1
|
+
"""Utilities to perform interpolation on missing values using the temporal
|
2
|
+
dimension.
|
3
|
+
"""
|
4
|
+
|
5
|
+
import openeo
|
6
|
+
|
7
|
+
|
8
|
+
def linear_interpolation(
|
9
|
+
cube: openeo.DataCube,
|
10
|
+
) -> openeo.DataCube:
|
11
|
+
"""Perform linear interpolation on the given datacube."""
|
12
|
+
return cube.apply_dimension(dimension="t", process="array_interpolate_linear")
|
@@ -0,0 +1,64 @@
|
|
1
|
+
"""Routines to pre-process sar signals."""
|
2
|
+
|
3
|
+
import openeo
|
4
|
+
from openeo.processes import array_create, if_, is_nodata, power
|
5
|
+
|
6
|
+
from openeo_gfmap import Backend, BackendContext
|
7
|
+
|
8
|
+
|
9
|
+
def compress_backscatter_uint16(
|
10
|
+
backend_context: BackendContext, cube: openeo.DataCube
|
11
|
+
) -> openeo.DataCube:
|
12
|
+
"""
|
13
|
+
Scaling the bands from float32 power values to uint16 for memory optimization. The scaling
|
14
|
+
casts the values from power to decibels and applies a linear scaling from 0 to 65534.
|
15
|
+
|
16
|
+
The resulting datacube has a uint16 memory representation which makes an optimization
|
17
|
+
before passing through any UDFs.
|
18
|
+
|
19
|
+
Parameters
|
20
|
+
----------
|
21
|
+
backend_context : BackendContext
|
22
|
+
The backend context to fetch the backend name.
|
23
|
+
cube : openeo.DataCube
|
24
|
+
The datacube to compress the backscatter values.
|
25
|
+
Returns
|
26
|
+
-------
|
27
|
+
openeo.DataCube
|
28
|
+
The datacube with the backscatter values compressed to uint16.
|
29
|
+
"""
|
30
|
+
backend = backend_context.backend
|
31
|
+
|
32
|
+
# Additional check related to problematic values present in creodias collections.
|
33
|
+
# https://github.com/Open-EO/openeo-geopyspark-driver/issues/293
|
34
|
+
if backend in [Backend.CDSE, Backend.CDSE_STAGING, Backend.FED]:
|
35
|
+
cube = cube.apply_dimension(
|
36
|
+
dimension="bands",
|
37
|
+
process=lambda x: array_create(
|
38
|
+
[
|
39
|
+
if_(
|
40
|
+
is_nodata(x[0]),
|
41
|
+
1,
|
42
|
+
power(base=10, p=(10.0 * x[0].log(base=10) + 83.0) / 20.0),
|
43
|
+
),
|
44
|
+
if_(
|
45
|
+
is_nodata(x[1]),
|
46
|
+
1,
|
47
|
+
power(base=10, p=(10.0 * x[1].log(base=10) + 83.0) / 20.0),
|
48
|
+
),
|
49
|
+
]
|
50
|
+
),
|
51
|
+
)
|
52
|
+
else:
|
53
|
+
cube = cube.apply_dimension(
|
54
|
+
dimension="bands",
|
55
|
+
process=lambda x: array_create(
|
56
|
+
[
|
57
|
+
power(base=10, p=(10.0 * x[0].log(base=10) + 83.0) / 20.0),
|
58
|
+
power(base=10, p=(10.0 * x[1].log(base=10) + 83.0) / 20.0),
|
59
|
+
]
|
60
|
+
),
|
61
|
+
)
|
62
|
+
|
63
|
+
# Change the data type to uint16 for optimization purposes
|
64
|
+
return cube.linear_scale_range(1, 65534, 1, 65534)
|
@@ -0,0 +1,65 @@
|
|
1
|
+
"""Scaling and compressing methods for datacubes."""
|
2
|
+
|
3
|
+
import openeo
|
4
|
+
|
5
|
+
|
6
|
+
def _compress(
|
7
|
+
cube: openeo.DataCube,
|
8
|
+
min_val: int,
|
9
|
+
max_val: int,
|
10
|
+
alpha: float,
|
11
|
+
beta: float,
|
12
|
+
):
|
13
|
+
if (
|
14
|
+
alpha != 1.0 or beta != 0.0
|
15
|
+
): # Avoid adding a node in the computing graph if scaling is not necessary
|
16
|
+
cube = (cube * alpha) + beta
|
17
|
+
|
18
|
+
return cube.linear_scale_range(min_val, max_val, min_val, max_val)
|
19
|
+
|
20
|
+
|
21
|
+
def compress_uint16(
|
22
|
+
cube: openeo.DataCube, alpha: float = 1.0, beta: float = 0.0
|
23
|
+
) -> openeo.DataCube:
|
24
|
+
"""Scales the data linearly using the formula `output = (input * a) + b` and compresses values
|
25
|
+
from float32 to uint16 for memory optimization.
|
26
|
+
|
27
|
+
Parameters
|
28
|
+
----------
|
29
|
+
cube : openeo.DataCube
|
30
|
+
The input datacube to compress, only meteo data should be present.
|
31
|
+
alpha : float, optional (default=1.0)
|
32
|
+
The scaling factor. Values in the input datacube are multiplied by this coefficient.
|
33
|
+
beta : float, optional (default=0.0)
|
34
|
+
The offset. Values in the input datacube are added by this value.
|
35
|
+
|
36
|
+
Returns
|
37
|
+
-------
|
38
|
+
cube : openeo.DataCube
|
39
|
+
The datacube with the data linearly scaled and compressed to uint16 and rescaled frome.
|
40
|
+
"""
|
41
|
+
return _compress(cube, 0, 65534, alpha, beta)
|
42
|
+
|
43
|
+
|
44
|
+
def compress_uint8(
|
45
|
+
cube: openeo.DataCube, alpha: float = 1.0, beta: float = 0.0
|
46
|
+
) -> openeo.DataCube:
|
47
|
+
"""
|
48
|
+
Scales the data linearly using the formula `output = (input * a) + b` and compresses values
|
49
|
+
from float32 to uint8 for memory optimization.
|
50
|
+
|
51
|
+
Parameters
|
52
|
+
----------
|
53
|
+
cube : openeo.DataCube
|
54
|
+
The input datacube to compress, only meteo data should be present.
|
55
|
+
alpha : float, optional (default=1.0)
|
56
|
+
The scaling factor. Values in the input datacube are multiplied by this coefficient.
|
57
|
+
beta : float, optional (default=0.0)
|
58
|
+
The offset. Values in the input datacube are added by this value.
|
59
|
+
|
60
|
+
Returns
|
61
|
+
-------
|
62
|
+
cube : openeo.DataCube
|
63
|
+
The datacube with the data linearly scaled and compressed to uint8 and rescaled frome.
|
64
|
+
"""
|
65
|
+
return _compress(cube, 0, 253, alpha, beta)
|
@@ -0,0 +1,36 @@
|
|
1
|
+
import numpy as np
|
2
|
+
import xarray as xr
|
3
|
+
from openeo.udf import XarrayDataCube
|
4
|
+
|
5
|
+
|
6
|
+
def apply_datacube(cube: XarrayDataCube, context: dict) -> XarrayDataCube:
|
7
|
+
"""
|
8
|
+
Computes a cloud mask covering a full observation or nothing depending on the percentage of
|
9
|
+
high probability cloud pixels. If the amount of before mentioned pixels is higher than 95%,
|
10
|
+
then returns a mask covering the whole observation, otherwise returns an empty mask.
|
11
|
+
"""
|
12
|
+
array = cube.get_array().transpose("t", "bands", "y", "x")
|
13
|
+
|
14
|
+
output_array = np.zeros(
|
15
|
+
shape=(array.shape[0], 1, array.shape[2], array.shape[3]), dtype=np.uint8
|
16
|
+
)
|
17
|
+
|
18
|
+
for i in range(array.shape[0]):
|
19
|
+
high_proba_count = ((array[i] == 9) * 1).sum()
|
20
|
+
high_proba_percentage = high_proba_count / (array.shape[2] * array.shape[3])
|
21
|
+
|
22
|
+
if high_proba_percentage > 0.95:
|
23
|
+
output_array[i] = 1
|
24
|
+
|
25
|
+
output_array = xr.DataArray(
|
26
|
+
output_array,
|
27
|
+
dims=["t", "bands", "y", "x"],
|
28
|
+
coords={
|
29
|
+
"t": array.t,
|
30
|
+
"bands": ["mask"],
|
31
|
+
"y": array.y,
|
32
|
+
"x": array.x,
|
33
|
+
},
|
34
|
+
)
|
35
|
+
|
36
|
+
return XarrayDataCube(output_array)
|
@@ -0,0 +1,37 @@
|
|
1
|
+
import numpy as np
|
2
|
+
import xarray as xr
|
3
|
+
from openeo.udf import XarrayDataCube
|
4
|
+
|
5
|
+
|
6
|
+
def apply_datacube(cube: XarrayDataCube, context: dict) -> XarrayDataCube:
|
7
|
+
"""For a cube having the BAP score, and a given period of list of intervals,
|
8
|
+
create a binary mask that for each pixel, is True if the BAP score is the
|
9
|
+
best within the given interval. The output has the same dimensions as the
|
10
|
+
input, but only has binary values.
|
11
|
+
|
12
|
+
This UDF do not support yet the implementation of string periods such as
|
13
|
+
"month", "dekad", etc...
|
14
|
+
"""
|
15
|
+
# First check if the period is defined in the context
|
16
|
+
intervals = context.get("intervals", None)
|
17
|
+
array = cube.get_array().transpose("t", "bands", "y", "x")
|
18
|
+
|
19
|
+
bap_score = array.sel(bands=["S2-L2A-BAPSCORE"])
|
20
|
+
|
21
|
+
def select_maximum(score: xr.DataArray):
|
22
|
+
max_score = score.max(dim="t")
|
23
|
+
return score == max_score
|
24
|
+
|
25
|
+
if isinstance(intervals, str):
|
26
|
+
raise NotImplementedError(
|
27
|
+
"Period as string is not implemented yet, please provide a list of interval tuples."
|
28
|
+
)
|
29
|
+
elif isinstance(intervals, list):
|
30
|
+
# Convert YYYY-mm-dd to datetime64 objects
|
31
|
+
time_bins = [np.datetime64(interval[0]) for interval in intervals]
|
32
|
+
|
33
|
+
rank_mask = bap_score.groupby_bins("t", bins=time_bins).map(select_maximum)
|
34
|
+
else:
|
35
|
+
raise ValueError("Period is not defined in the UDF. Cannot run it.")
|
36
|
+
|
37
|
+
return XarrayDataCube(rank_mask)
|
@@ -0,0 +1,103 @@
|
|
1
|
+
import math
|
2
|
+
|
3
|
+
import numpy as np
|
4
|
+
import xarray as xr
|
5
|
+
from openeo.udf import XarrayDataCube
|
6
|
+
from scipy.ndimage import distance_transform_cdt
|
7
|
+
from skimage.morphology import binary_erosion, footprints
|
8
|
+
|
9
|
+
|
10
|
+
def apply_datacube(cube: XarrayDataCube, context: dict) -> XarrayDataCube:
|
11
|
+
cube_array: xr.DataArray = cube.get_array()
|
12
|
+
cube_array = cube_array.transpose("t", "bands", "y", "x")
|
13
|
+
|
14
|
+
clouds = np.logical_or(
|
15
|
+
np.logical_and(cube_array < 11, cube_array >= 8), cube_array == 3
|
16
|
+
).isel(bands=0)
|
17
|
+
|
18
|
+
weights = [1, 0.8, 0.5]
|
19
|
+
|
20
|
+
# Calculate the Day Of Year score
|
21
|
+
times = cube_array.t.dt.day.values # returns day of the month for each date
|
22
|
+
sigma = 5
|
23
|
+
mu = 15
|
24
|
+
score_doy = (
|
25
|
+
1
|
26
|
+
/ (sigma * math.sqrt(2 * math.pi))
|
27
|
+
* np.exp(-0.5 * ((times - mu) / sigma) ** 2)
|
28
|
+
)
|
29
|
+
score_doy = np.broadcast_to(
|
30
|
+
score_doy[:, np.newaxis, np.newaxis],
|
31
|
+
[cube_array.sizes["t"], cube_array.sizes["y"], cube_array.sizes["x"]],
|
32
|
+
)
|
33
|
+
|
34
|
+
# Calculate the Distance To Cloud score
|
35
|
+
# Erode
|
36
|
+
# Source: https://github.com/dzanaga/satio-pc/blob/e5fc46c0c14bba77e01dca409cf431e7ef22c077/src/satio_pc/preprocessing/clouds.py#L127
|
37
|
+
e = footprints.disk(3)
|
38
|
+
|
39
|
+
# Define a function to apply binary erosion
|
40
|
+
def erode(image, selem):
|
41
|
+
return ~binary_erosion(image, selem)
|
42
|
+
|
43
|
+
# Use apply_ufunc to apply the erosion operation
|
44
|
+
eroded = xr.apply_ufunc(
|
45
|
+
erode, # function to apply
|
46
|
+
clouds, # input DataArray
|
47
|
+
input_core_dims=[["y", "x"]], # dimensions over which to apply function
|
48
|
+
output_core_dims=[["y", "x"]], # dimensions of the output
|
49
|
+
vectorize=True, # vectorize the function over non-core dimensions
|
50
|
+
dask="parallelized", # enable dask parallelization
|
51
|
+
output_dtypes=[np.int32], # data type of the output
|
52
|
+
kwargs={"selem": e}, # additional keyword arguments to pass to erode
|
53
|
+
)
|
54
|
+
|
55
|
+
# Distance to cloud = dilation
|
56
|
+
d_min = 0
|
57
|
+
d_req = 50
|
58
|
+
d = xr.apply_ufunc(
|
59
|
+
distance_transform_cdt,
|
60
|
+
eroded,
|
61
|
+
input_core_dims=[["y", "x"]],
|
62
|
+
output_core_dims=[["y", "x"]],
|
63
|
+
vectorize=True,
|
64
|
+
dask="parallelized",
|
65
|
+
output_dtypes=[np.int32],
|
66
|
+
)
|
67
|
+
d = xr.where(d == -1, d_req, d)
|
68
|
+
score_clouds = 1 / (1 + np.exp(-0.2 * (np.minimum(d, d_req) - (d_req - d_min) / 2)))
|
69
|
+
|
70
|
+
# Calculate the Coverage score
|
71
|
+
score_cov = 1 - clouds.sum(dim="x").sum(dim="y") / (
|
72
|
+
cube_array.sizes["x"] * cube_array.sizes["y"]
|
73
|
+
)
|
74
|
+
score_cov = np.broadcast_to(
|
75
|
+
score_cov.values[:, np.newaxis, np.newaxis],
|
76
|
+
[cube_array.sizes["t"], cube_array.sizes["y"], cube_array.sizes["x"]],
|
77
|
+
)
|
78
|
+
|
79
|
+
# Final score is weighted average
|
80
|
+
score = (
|
81
|
+
weights[0] * score_clouds + weights[1] * score_doy + weights[2] * score_cov
|
82
|
+
) / sum(weights)
|
83
|
+
score = np.where(cube_array.values[:, 0, :, :] == 0, 0, score)
|
84
|
+
|
85
|
+
score_da = xr.DataArray(
|
86
|
+
score,
|
87
|
+
coords={
|
88
|
+
"t": cube_array.coords["t"],
|
89
|
+
"y": cube_array.coords["y"],
|
90
|
+
"x": cube_array.coords["x"],
|
91
|
+
},
|
92
|
+
dims=["t", "y", "x"],
|
93
|
+
)
|
94
|
+
|
95
|
+
score_da = score_da.expand_dims(
|
96
|
+
dim={
|
97
|
+
"bands": cube_array.coords["bands"],
|
98
|
+
},
|
99
|
+
)
|
100
|
+
|
101
|
+
score_da = score_da.transpose("t", "bands", "y", "x")
|
102
|
+
|
103
|
+
return XarrayDataCube(score_da)
|