openeo-gfmap 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (40) hide show
  1. openeo_gfmap/__init__.py +23 -0
  2. openeo_gfmap/backend.py +122 -0
  3. openeo_gfmap/features/__init__.py +17 -0
  4. openeo_gfmap/features/feature_extractor.py +389 -0
  5. openeo_gfmap/fetching/__init__.py +21 -0
  6. openeo_gfmap/fetching/commons.py +213 -0
  7. openeo_gfmap/fetching/fetching.py +98 -0
  8. openeo_gfmap/fetching/generic.py +165 -0
  9. openeo_gfmap/fetching/meteo.py +126 -0
  10. openeo_gfmap/fetching/s1.py +195 -0
  11. openeo_gfmap/fetching/s2.py +236 -0
  12. openeo_gfmap/inference/__init__.py +3 -0
  13. openeo_gfmap/inference/model_inference.py +347 -0
  14. openeo_gfmap/manager/__init__.py +31 -0
  15. openeo_gfmap/manager/job_manager.py +469 -0
  16. openeo_gfmap/manager/job_splitters.py +144 -0
  17. openeo_gfmap/metadata.py +24 -0
  18. openeo_gfmap/preprocessing/__init__.py +22 -0
  19. openeo_gfmap/preprocessing/cloudmasking.py +268 -0
  20. openeo_gfmap/preprocessing/compositing.py +74 -0
  21. openeo_gfmap/preprocessing/interpolation.py +12 -0
  22. openeo_gfmap/preprocessing/sar.py +64 -0
  23. openeo_gfmap/preprocessing/scaling.py +65 -0
  24. openeo_gfmap/preprocessing/udf_cldmask.py +36 -0
  25. openeo_gfmap/preprocessing/udf_rank.py +37 -0
  26. openeo_gfmap/preprocessing/udf_score.py +103 -0
  27. openeo_gfmap/spatial.py +53 -0
  28. openeo_gfmap/stac/__init__.py +2 -0
  29. openeo_gfmap/stac/constants.py +51 -0
  30. openeo_gfmap/temporal.py +22 -0
  31. openeo_gfmap/utils/__init__.py +23 -0
  32. openeo_gfmap/utils/build_df.py +48 -0
  33. openeo_gfmap/utils/catalogue.py +248 -0
  34. openeo_gfmap/utils/intervals.py +64 -0
  35. openeo_gfmap/utils/netcdf.py +25 -0
  36. openeo_gfmap/utils/tile_processing.py +64 -0
  37. openeo_gfmap-0.1.0.dist-info/METADATA +57 -0
  38. openeo_gfmap-0.1.0.dist-info/RECORD +40 -0
  39. openeo_gfmap-0.1.0.dist-info/WHEEL +4 -0
  40. openeo_gfmap-0.1.0.dist-info/licenses/LICENSE +201 -0
@@ -0,0 +1,268 @@
1
+ """Different cloud masking strategies for an OpenEO datacubes."""
2
+
3
+ from pathlib import Path
4
+ from typing import Union
5
+
6
+ import openeo
7
+ from openeo.processes import if_, is_nan
8
+
9
+ SCL_HARMONIZED_NAME: str = "S2-L2A-SCL"
10
+ BAPSCORE_HARMONIZED_NAME: str = "S2-L2A-BAPSCORE"
11
+
12
+
13
+ def mask_scl_dilation(cube: openeo.DataCube, **params: dict) -> openeo.DataCube:
14
+ """Creates a mask from the SCL, dilates it and applies the mask to the optical
15
+ bands of the datacube. The other bands such as DEM, SAR and METEO will not
16
+ be affected by the mask.
17
+ """
18
+ # Asserts if the SCL layer exists
19
+ assert (
20
+ SCL_HARMONIZED_NAME in cube.metadata.band_names
21
+ ), f"The SCL band ({SCL_HARMONIZED_NAME}) is not present in the datacube."
22
+
23
+ kernel1_size = params.get("kernel1_size", 17)
24
+ kernel2_size = params.get("kernel2_size", 3)
25
+ erosion_kernel_size = params.get("erosion_kernel_size", 3)
26
+
27
+ # TODO adapt the dilation size given the mask size in meters
28
+ # TODO check how to get the spatial resolution from the cube metadata
29
+
30
+ # Only applies the filtering to the optical part of the cube
31
+ optical_cube = cube.filter_bands(
32
+ bands=list(filter(lambda band: band.startswith("S2"), cube.metadata.band_names))
33
+ )
34
+
35
+ nonoptical_cube = cube.filter_bands(
36
+ bands=list(
37
+ filter(lambda band: not band.startswith("S2"), cube.metadata.band_names)
38
+ )
39
+ )
40
+
41
+ optical_cube = optical_cube.process(
42
+ "mask_scl_dilation",
43
+ data=optical_cube,
44
+ scl_band_name=SCL_HARMONIZED_NAME,
45
+ kernel1_size=kernel1_size,
46
+ kernel2_size=kernel2_size,
47
+ mask1_values=[2, 4, 5, 6, 7],
48
+ mask2_values=[3, 8, 9, 10, 11],
49
+ erosion_kernel_size=erosion_kernel_size,
50
+ )
51
+
52
+ if len(nonoptical_cube.metadata.band_names) == 0:
53
+ return optical_cube
54
+
55
+ return optical_cube.merge_cubes(nonoptical_cube)
56
+
57
+
58
+ def get_bap_score(cube: openeo.DataCube, **params: dict) -> openeo.DataCube:
59
+ """Calculates the Best Available Pixel (BAP) score for the given datacube,
60
+ computed from the SCL layer.
61
+
62
+ The BAP score is calculated via using a UDF, which gives a lot of
63
+ flexibility on the calculation methodology. The BAP score is a weighted
64
+ average of three scores:
65
+ * Distance-to-Cloud Score: Pixels that are clouds are given score 0.
66
+ Pixels that are moren than 50 pixels - calculated with the Manhattan
67
+ distance measure - away from a cloud pixel are given score 1. The pixels
68
+ in between are given a score versus distance-to-cloud that follows a
69
+ Gaussian shape.
70
+ * Coverage Score: Per date, the percentage of all pixels that are classified
71
+ as a cloud over the entire spatial extent is calculated. The Coverage
72
+ Score is then equal to 1 - the cloud percentage.
73
+ * Date Score: In order to favor pixels that are observed in the middle of a
74
+ month, a date score is calculated, which follows a Gaussian shape. I.e.
75
+ the largest scores are given for days in the middle of the month, the
76
+ lowest scores are given for days at the beginning and end of the month.
77
+
78
+ The final BAP score is a weighted average of the three aforementioned
79
+ scores. The weights are 1, 0.5 and 0.8 for the Distance-to-Cloud, Coverage
80
+ and Date Score respectively.
81
+
82
+ Parameters
83
+ ----------
84
+ cube : openeo.DataCube
85
+ The datacube to compute the BAP score from, only the SCL band is used.
86
+ params : dict
87
+ Addtional parameters to add to this routine.
88
+ * `apply_scl_dilation`: Whether to apply dilation to the SCL mask before computing the BAP.
89
+ * `kernel1_size`: The size of the first kernel used for the dilation of the SCL mask.
90
+ * `kernel2_size`: The size of the second kernel used for the dilation of the SCL mask.
91
+ * `erosion_kernel_size`: The size of the kernel used for the erosion of the SCL mask.
92
+
93
+ Returns
94
+ -------
95
+ openeo.DataCube
96
+ A 4D datacube containing the BAP score as name 'S2-L2A-BAPSCORE'.
97
+ """
98
+ udf_path = Path(__file__).parent / "udf_score.py"
99
+
100
+ # Select the SCL band
101
+ scl_cube = cube.filter_bands([SCL_HARMONIZED_NAME])
102
+
103
+ if params.get("apply_scl_dilation", False):
104
+ kernel1_size = params.get("kernel1_size", 17)
105
+ kernel2_size = params.get("kernel2_size", 3)
106
+ erosion_kernel_size = params.get("erosion_kernel_size", 3)
107
+
108
+ scl_cube = scl_cube.process(
109
+ "to_scl_dilation_mask",
110
+ data=scl_cube,
111
+ scl_band_name=SCL_HARMONIZED_NAME,
112
+ kernel1_size=kernel1_size,
113
+ kernel2_size=kernel2_size,
114
+ mask1_values=[2, 4, 5, 6, 7],
115
+ mask2_values=[3, 8, 9, 10, 11],
116
+ erosion_kernel_size=erosion_kernel_size,
117
+ )
118
+
119
+ # Replace NaN to 0 to avoid issues in the UDF
120
+ scl_cube = scl_cube.apply(lambda x: if_(is_nan(x), 0, x))
121
+
122
+ score = scl_cube.apply_neighborhood(
123
+ process=openeo.UDF.from_file(str(udf_path)),
124
+ size=[
125
+ {"dimension": "x", "unit": "px", "value": 256},
126
+ {"dimension": "y", "unit": "px", "value": 256},
127
+ ],
128
+ overlap=[
129
+ {"dimension": "x", "unit": "px", "value": 16},
130
+ {"dimension": "y", "unit": "px", "value": 16},
131
+ ],
132
+ )
133
+
134
+ score = score.rename_labels("bands", [BAPSCORE_HARMONIZED_NAME])
135
+
136
+ # Merge the score to the scl cube
137
+ return score
138
+
139
+
140
+ def get_bap_mask(cube: openeo.DataCube, period: Union[str, list], **params: dict):
141
+ """Computes the bap score and masks the optical bands of the datacube using
142
+ the best scores for each pixel on a given time period. This method both
143
+ performs cloud masking but also a type of compositing.
144
+
145
+ The BAP mask is computed using the method `get_bap_score`, from which the
146
+ maximum argument is taken on every pixel on the given period. This will
147
+ therefore return an array that for each optical observation, will return
148
+ if the pixel must be loaded or not, allowing for high cost optimization.
149
+
150
+ Parameters
151
+ ----------
152
+ cube : openeo.DataCube
153
+ The datacube to be processed.
154
+ period : Union[str, list]
155
+ A string or a list of dates (in YYYY-mm-dd format) to be used as the
156
+ temporal period to compute the BAP score.
157
+ params : dict
158
+ Additionals parameters, not used yet.
159
+ Returns
160
+ -------
161
+ openeo.DataCube
162
+ The datacube with the BAP mask applied.
163
+ """
164
+ # Checks if the S2-L2A-SCL band is present in the datacube
165
+ assert (
166
+ SCL_HARMONIZED_NAME in cube.metadata.band_names
167
+ ), f"The {SCL_HARMONIZED_NAME} band is not present in the datacube."
168
+
169
+ bap_score = get_bap_score(cube, **params)
170
+
171
+ if isinstance(period, str):
172
+
173
+ def max_score_selection(score):
174
+ max_score = score.max()
175
+ return score.array_apply(lambda x: x != max_score)
176
+
177
+ rank_mask = bap_score.apply_neighborhood(
178
+ max_score_selection,
179
+ size=[
180
+ {"dimension": "x", "unit": "px", "value": 1},
181
+ {"dimension": "y", "unit": "px", "value": 1},
182
+ {"dimension": "t", "value": period},
183
+ ],
184
+ overlap=[],
185
+ )
186
+ elif isinstance(period, list):
187
+ udf_path = Path(__file__).parent / "udf_rank.py"
188
+ rank_mask = bap_score.apply_neighborhood(
189
+ process=openeo.UDF.from_file(str(udf_path), context={"intervals": period}),
190
+ size=[
191
+ {"dimension": "x", "unit": "px", "value": 256},
192
+ {"dimension": "y", "unit": "px", "value": 256},
193
+ ],
194
+ overlap=[],
195
+ )
196
+ else:
197
+ raise ValueError(
198
+ f"'period' must be a string or a list of dates (in YYYY-mm-dd format), got {period}."
199
+ )
200
+
201
+ return rank_mask.rename_labels("bands", ["S2-L2A-BAPMASK"])
202
+
203
+
204
+ def bap_masking(cube: openeo.DataCube, period: Union[str, list], **params: dict):
205
+ """Computes the bap mask as described in `get_bap_mask` and applies it to
206
+ the optical part of the cube.
207
+
208
+ Parameters
209
+ ----------
210
+ cube : openeo.DataCube
211
+ The datacube to be processed.
212
+ period : Union[str, list]
213
+ A string or a list of dates (in YYYY-mm-dd format) to be used as the
214
+ temporal period to compute the BAP score.
215
+ params : dict
216
+ Additionals parameters, not used yet.
217
+ Returns
218
+ -------
219
+ openeo.DataCube
220
+ The datacube with the BAP mask applied.
221
+ """
222
+ optical_cube = cube.filter_bands(
223
+ bands=list(filter(lambda band: band.startswith("S2"), cube.metadata.band_names))
224
+ )
225
+
226
+ nonoptical_cube = cube.filter_bands(
227
+ bands=list(
228
+ filter(lambda band: not band.startswith("S2"), cube.metadata.band_names)
229
+ )
230
+ )
231
+
232
+ rank_mask = get_bap_mask(optical_cube, period, **params)
233
+
234
+ optical_cube = optical_cube.mask(rank_mask.resample_cube_spatial(cube))
235
+
236
+ # Do not merge if bands are empty!
237
+ if len(nonoptical_cube.metadata.band_names) == 0:
238
+ return optical_cube
239
+
240
+ return optical_cube.merge_cubes(nonoptical_cube)
241
+
242
+
243
+ def cloudmask_percentage(
244
+ cube: openeo.DataCube, percentage: float = 0.95
245
+ ) -> openeo.DataCube:
246
+ """Compute a cloud mask array, that either fully covers an observation or is empty.
247
+ It computes the percentage of HIGH_CLOUD_PROBABILITY pixels in the SCL mask. If the percentage
248
+ is higher than the given threshold, the mask will be covering the observation, otherwise False.
249
+ """
250
+ non_scl_cube = cube.filter_bands(
251
+ bands=list(filter(lambda band: "SCL" not in band, cube.metadata.band_names))
252
+ )
253
+
254
+ scl_cube = cube.filter_bands(["SCL"])
255
+
256
+ cloud_mask = scl_cube.apply_neighborhood(
257
+ process=openeo.UDF.from_file("udf_mask.py", context={}),
258
+ size=[
259
+ {"dimension": "x", "unit": "px", "value": 1024},
260
+ {"dimension": "y", "unit": "px", "value": 1024},
261
+ {"dimension": "t", "value": 1},
262
+ ],
263
+ overlap=[],
264
+ )
265
+
266
+ non_scl_cube = non_scl_cube.mask(cloud_mask.resample_cube_spatial(cube))
267
+
268
+ return non_scl_cube.merge_cubes(scl_cube)
@@ -0,0 +1,74 @@
1
+ """Temporal compositing, or temporal aggregation, is a method to increase the
2
+ quality of data within timesteps by reducing the temporal resolution of a time
3
+ series of satellite images.
4
+ """
5
+
6
+ from typing import Union
7
+
8
+ import openeo
9
+
10
+
11
+ def median_compositing(
12
+ cube: openeo.DataCube, period: Union[str, list]
13
+ ) -> openeo.DataCube:
14
+ """Perfrom median compositing on the given datacube."""
15
+ if isinstance(period, str):
16
+ return cube.aggregate_temporal_period(
17
+ period=period, reducer="median", dimension="t"
18
+ )
19
+ elif isinstance(period, list):
20
+ return cube.aggregate_temporal(
21
+ intervals=period, reducer="median", dimension="t"
22
+ )
23
+
24
+
25
+ def mean_compositing(
26
+ cube: openeo.DataCube, period: Union[str, list]
27
+ ) -> openeo.DataCube:
28
+ """Perfrom mean compositing on the given datacube."""
29
+ if isinstance(period, str):
30
+ return cube.aggregate_temporal_period(
31
+ period=period, reducer="mean", dimension="t"
32
+ )
33
+ elif isinstance(period, list):
34
+ return cube.aggregate_temporal(intervals=period, reducer="mean", dimension="t")
35
+
36
+
37
+ def sum_compositing(cube: openeo.DataCube, period: Union[str, list]) -> openeo.DataCube:
38
+ """Perform sum compositing on the given datacube."""
39
+ if isinstance(period, str):
40
+ return cube.aggregate_temporal_period(
41
+ period=period, reducer="sum", dimension="t"
42
+ )
43
+ elif isinstance(period, list):
44
+ return cube.aggregate_temporal(intervals=period, reducer="sum", dimension="t")
45
+
46
+
47
+ def max_ndvi_compositing(cube: openeo.DataCube, period: str) -> openeo.DataCube:
48
+ """Perform compositing by selecting the observation with the highest NDVI value over the
49
+ given compositing window."""
50
+
51
+ def max_ndvi_selection(ndvi: openeo.DataCube):
52
+ max_ndvi = ndvi.max()
53
+ return ndvi.array_apply(lambda x: x != max_ndvi)
54
+
55
+ if isinstance(period, str):
56
+ ndvi = cube.ndvi(nir="S2-L2A-B08", red="S2-L2A-B04")
57
+
58
+ rank_mask = ndvi.apply_neighborhood(
59
+ max_ndvi_selection,
60
+ size=[
61
+ {"dimension": "x", "unit": "px", "value": 1},
62
+ {"dimension": "y", "unit": "px", "value": 1},
63
+ {"dimension": "t", "value": period},
64
+ ],
65
+ overlap=[],
66
+ )
67
+
68
+ cube = cube.mask(mask=rank_mask).aggregate_temporal_period(period, "first")
69
+
70
+ else:
71
+ raise ValueError(
72
+ "Custom temporal intervals are not yet supported for max NDVI compositing."
73
+ )
74
+ return cube
@@ -0,0 +1,12 @@
1
+ """Utilities to perform interpolation on missing values using the temporal
2
+ dimension.
3
+ """
4
+
5
+ import openeo
6
+
7
+
8
+ def linear_interpolation(
9
+ cube: openeo.DataCube,
10
+ ) -> openeo.DataCube:
11
+ """Perform linear interpolation on the given datacube."""
12
+ return cube.apply_dimension(dimension="t", process="array_interpolate_linear")
@@ -0,0 +1,64 @@
1
+ """Routines to pre-process sar signals."""
2
+
3
+ import openeo
4
+ from openeo.processes import array_create, if_, is_nodata, power
5
+
6
+ from openeo_gfmap import Backend, BackendContext
7
+
8
+
9
+ def compress_backscatter_uint16(
10
+ backend_context: BackendContext, cube: openeo.DataCube
11
+ ) -> openeo.DataCube:
12
+ """
13
+ Scaling the bands from float32 power values to uint16 for memory optimization. The scaling
14
+ casts the values from power to decibels and applies a linear scaling from 0 to 65534.
15
+
16
+ The resulting datacube has a uint16 memory representation which makes an optimization
17
+ before passing through any UDFs.
18
+
19
+ Parameters
20
+ ----------
21
+ backend_context : BackendContext
22
+ The backend context to fetch the backend name.
23
+ cube : openeo.DataCube
24
+ The datacube to compress the backscatter values.
25
+ Returns
26
+ -------
27
+ openeo.DataCube
28
+ The datacube with the backscatter values compressed to uint16.
29
+ """
30
+ backend = backend_context.backend
31
+
32
+ # Additional check related to problematic values present in creodias collections.
33
+ # https://github.com/Open-EO/openeo-geopyspark-driver/issues/293
34
+ if backend in [Backend.CDSE, Backend.CDSE_STAGING, Backend.FED]:
35
+ cube = cube.apply_dimension(
36
+ dimension="bands",
37
+ process=lambda x: array_create(
38
+ [
39
+ if_(
40
+ is_nodata(x[0]),
41
+ 1,
42
+ power(base=10, p=(10.0 * x[0].log(base=10) + 83.0) / 20.0),
43
+ ),
44
+ if_(
45
+ is_nodata(x[1]),
46
+ 1,
47
+ power(base=10, p=(10.0 * x[1].log(base=10) + 83.0) / 20.0),
48
+ ),
49
+ ]
50
+ ),
51
+ )
52
+ else:
53
+ cube = cube.apply_dimension(
54
+ dimension="bands",
55
+ process=lambda x: array_create(
56
+ [
57
+ power(base=10, p=(10.0 * x[0].log(base=10) + 83.0) / 20.0),
58
+ power(base=10, p=(10.0 * x[1].log(base=10) + 83.0) / 20.0),
59
+ ]
60
+ ),
61
+ )
62
+
63
+ # Change the data type to uint16 for optimization purposes
64
+ return cube.linear_scale_range(1, 65534, 1, 65534)
@@ -0,0 +1,65 @@
1
+ """Scaling and compressing methods for datacubes."""
2
+
3
+ import openeo
4
+
5
+
6
+ def _compress(
7
+ cube: openeo.DataCube,
8
+ min_val: int,
9
+ max_val: int,
10
+ alpha: float,
11
+ beta: float,
12
+ ):
13
+ if (
14
+ alpha != 1.0 or beta != 0.0
15
+ ): # Avoid adding a node in the computing graph if scaling is not necessary
16
+ cube = (cube * alpha) + beta
17
+
18
+ return cube.linear_scale_range(min_val, max_val, min_val, max_val)
19
+
20
+
21
+ def compress_uint16(
22
+ cube: openeo.DataCube, alpha: float = 1.0, beta: float = 0.0
23
+ ) -> openeo.DataCube:
24
+ """Scales the data linearly using the formula `output = (input * a) + b` and compresses values
25
+ from float32 to uint16 for memory optimization.
26
+
27
+ Parameters
28
+ ----------
29
+ cube : openeo.DataCube
30
+ The input datacube to compress, only meteo data should be present.
31
+ alpha : float, optional (default=1.0)
32
+ The scaling factor. Values in the input datacube are multiplied by this coefficient.
33
+ beta : float, optional (default=0.0)
34
+ The offset. Values in the input datacube are added by this value.
35
+
36
+ Returns
37
+ -------
38
+ cube : openeo.DataCube
39
+ The datacube with the data linearly scaled and compressed to uint16 and rescaled frome.
40
+ """
41
+ return _compress(cube, 0, 65534, alpha, beta)
42
+
43
+
44
+ def compress_uint8(
45
+ cube: openeo.DataCube, alpha: float = 1.0, beta: float = 0.0
46
+ ) -> openeo.DataCube:
47
+ """
48
+ Scales the data linearly using the formula `output = (input * a) + b` and compresses values
49
+ from float32 to uint8 for memory optimization.
50
+
51
+ Parameters
52
+ ----------
53
+ cube : openeo.DataCube
54
+ The input datacube to compress, only meteo data should be present.
55
+ alpha : float, optional (default=1.0)
56
+ The scaling factor. Values in the input datacube are multiplied by this coefficient.
57
+ beta : float, optional (default=0.0)
58
+ The offset. Values in the input datacube are added by this value.
59
+
60
+ Returns
61
+ -------
62
+ cube : openeo.DataCube
63
+ The datacube with the data linearly scaled and compressed to uint8 and rescaled frome.
64
+ """
65
+ return _compress(cube, 0, 253, alpha, beta)
@@ -0,0 +1,36 @@
1
+ import numpy as np
2
+ import xarray as xr
3
+ from openeo.udf import XarrayDataCube
4
+
5
+
6
+ def apply_datacube(cube: XarrayDataCube, context: dict) -> XarrayDataCube:
7
+ """
8
+ Computes a cloud mask covering a full observation or nothing depending on the percentage of
9
+ high probability cloud pixels. If the amount of before mentioned pixels is higher than 95%,
10
+ then returns a mask covering the whole observation, otherwise returns an empty mask.
11
+ """
12
+ array = cube.get_array().transpose("t", "bands", "y", "x")
13
+
14
+ output_array = np.zeros(
15
+ shape=(array.shape[0], 1, array.shape[2], array.shape[3]), dtype=np.uint8
16
+ )
17
+
18
+ for i in range(array.shape[0]):
19
+ high_proba_count = ((array[i] == 9) * 1).sum()
20
+ high_proba_percentage = high_proba_count / (array.shape[2] * array.shape[3])
21
+
22
+ if high_proba_percentage > 0.95:
23
+ output_array[i] = 1
24
+
25
+ output_array = xr.DataArray(
26
+ output_array,
27
+ dims=["t", "bands", "y", "x"],
28
+ coords={
29
+ "t": array.t,
30
+ "bands": ["mask"],
31
+ "y": array.y,
32
+ "x": array.x,
33
+ },
34
+ )
35
+
36
+ return XarrayDataCube(output_array)
@@ -0,0 +1,37 @@
1
+ import numpy as np
2
+ import xarray as xr
3
+ from openeo.udf import XarrayDataCube
4
+
5
+
6
+ def apply_datacube(cube: XarrayDataCube, context: dict) -> XarrayDataCube:
7
+ """For a cube having the BAP score, and a given period of list of intervals,
8
+ create a binary mask that for each pixel, is True if the BAP score is the
9
+ best within the given interval. The output has the same dimensions as the
10
+ input, but only has binary values.
11
+
12
+ This UDF do not support yet the implementation of string periods such as
13
+ "month", "dekad", etc...
14
+ """
15
+ # First check if the period is defined in the context
16
+ intervals = context.get("intervals", None)
17
+ array = cube.get_array().transpose("t", "bands", "y", "x")
18
+
19
+ bap_score = array.sel(bands=["S2-L2A-BAPSCORE"])
20
+
21
+ def select_maximum(score: xr.DataArray):
22
+ max_score = score.max(dim="t")
23
+ return score == max_score
24
+
25
+ if isinstance(intervals, str):
26
+ raise NotImplementedError(
27
+ "Period as string is not implemented yet, please provide a list of interval tuples."
28
+ )
29
+ elif isinstance(intervals, list):
30
+ # Convert YYYY-mm-dd to datetime64 objects
31
+ time_bins = [np.datetime64(interval[0]) for interval in intervals]
32
+
33
+ rank_mask = bap_score.groupby_bins("t", bins=time_bins).map(select_maximum)
34
+ else:
35
+ raise ValueError("Period is not defined in the UDF. Cannot run it.")
36
+
37
+ return XarrayDataCube(rank_mask)
@@ -0,0 +1,103 @@
1
+ import math
2
+
3
+ import numpy as np
4
+ import xarray as xr
5
+ from openeo.udf import XarrayDataCube
6
+ from scipy.ndimage import distance_transform_cdt
7
+ from skimage.morphology import binary_erosion, footprints
8
+
9
+
10
+ def apply_datacube(cube: XarrayDataCube, context: dict) -> XarrayDataCube:
11
+ cube_array: xr.DataArray = cube.get_array()
12
+ cube_array = cube_array.transpose("t", "bands", "y", "x")
13
+
14
+ clouds = np.logical_or(
15
+ np.logical_and(cube_array < 11, cube_array >= 8), cube_array == 3
16
+ ).isel(bands=0)
17
+
18
+ weights = [1, 0.8, 0.5]
19
+
20
+ # Calculate the Day Of Year score
21
+ times = cube_array.t.dt.day.values # returns day of the month for each date
22
+ sigma = 5
23
+ mu = 15
24
+ score_doy = (
25
+ 1
26
+ / (sigma * math.sqrt(2 * math.pi))
27
+ * np.exp(-0.5 * ((times - mu) / sigma) ** 2)
28
+ )
29
+ score_doy = np.broadcast_to(
30
+ score_doy[:, np.newaxis, np.newaxis],
31
+ [cube_array.sizes["t"], cube_array.sizes["y"], cube_array.sizes["x"]],
32
+ )
33
+
34
+ # Calculate the Distance To Cloud score
35
+ # Erode
36
+ # Source: https://github.com/dzanaga/satio-pc/blob/e5fc46c0c14bba77e01dca409cf431e7ef22c077/src/satio_pc/preprocessing/clouds.py#L127
37
+ e = footprints.disk(3)
38
+
39
+ # Define a function to apply binary erosion
40
+ def erode(image, selem):
41
+ return ~binary_erosion(image, selem)
42
+
43
+ # Use apply_ufunc to apply the erosion operation
44
+ eroded = xr.apply_ufunc(
45
+ erode, # function to apply
46
+ clouds, # input DataArray
47
+ input_core_dims=[["y", "x"]], # dimensions over which to apply function
48
+ output_core_dims=[["y", "x"]], # dimensions of the output
49
+ vectorize=True, # vectorize the function over non-core dimensions
50
+ dask="parallelized", # enable dask parallelization
51
+ output_dtypes=[np.int32], # data type of the output
52
+ kwargs={"selem": e}, # additional keyword arguments to pass to erode
53
+ )
54
+
55
+ # Distance to cloud = dilation
56
+ d_min = 0
57
+ d_req = 50
58
+ d = xr.apply_ufunc(
59
+ distance_transform_cdt,
60
+ eroded,
61
+ input_core_dims=[["y", "x"]],
62
+ output_core_dims=[["y", "x"]],
63
+ vectorize=True,
64
+ dask="parallelized",
65
+ output_dtypes=[np.int32],
66
+ )
67
+ d = xr.where(d == -1, d_req, d)
68
+ score_clouds = 1 / (1 + np.exp(-0.2 * (np.minimum(d, d_req) - (d_req - d_min) / 2)))
69
+
70
+ # Calculate the Coverage score
71
+ score_cov = 1 - clouds.sum(dim="x").sum(dim="y") / (
72
+ cube_array.sizes["x"] * cube_array.sizes["y"]
73
+ )
74
+ score_cov = np.broadcast_to(
75
+ score_cov.values[:, np.newaxis, np.newaxis],
76
+ [cube_array.sizes["t"], cube_array.sizes["y"], cube_array.sizes["x"]],
77
+ )
78
+
79
+ # Final score is weighted average
80
+ score = (
81
+ weights[0] * score_clouds + weights[1] * score_doy + weights[2] * score_cov
82
+ ) / sum(weights)
83
+ score = np.where(cube_array.values[:, 0, :, :] == 0, 0, score)
84
+
85
+ score_da = xr.DataArray(
86
+ score,
87
+ coords={
88
+ "t": cube_array.coords["t"],
89
+ "y": cube_array.coords["y"],
90
+ "x": cube_array.coords["x"],
91
+ },
92
+ dims=["t", "y", "x"],
93
+ )
94
+
95
+ score_da = score_da.expand_dims(
96
+ dim={
97
+ "bands": cube_array.coords["bands"],
98
+ },
99
+ )
100
+
101
+ score_da = score_da.transpose("t", "bands", "y", "x")
102
+
103
+ return XarrayDataCube(score_da)