openeo-gfmap 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- openeo_gfmap/__init__.py +23 -0
- openeo_gfmap/backend.py +122 -0
- openeo_gfmap/features/__init__.py +17 -0
- openeo_gfmap/features/feature_extractor.py +389 -0
- openeo_gfmap/fetching/__init__.py +21 -0
- openeo_gfmap/fetching/commons.py +213 -0
- openeo_gfmap/fetching/fetching.py +98 -0
- openeo_gfmap/fetching/generic.py +165 -0
- openeo_gfmap/fetching/meteo.py +126 -0
- openeo_gfmap/fetching/s1.py +195 -0
- openeo_gfmap/fetching/s2.py +236 -0
- openeo_gfmap/inference/__init__.py +3 -0
- openeo_gfmap/inference/model_inference.py +347 -0
- openeo_gfmap/manager/__init__.py +31 -0
- openeo_gfmap/manager/job_manager.py +469 -0
- openeo_gfmap/manager/job_splitters.py +144 -0
- openeo_gfmap/metadata.py +24 -0
- openeo_gfmap/preprocessing/__init__.py +22 -0
- openeo_gfmap/preprocessing/cloudmasking.py +268 -0
- openeo_gfmap/preprocessing/compositing.py +74 -0
- openeo_gfmap/preprocessing/interpolation.py +12 -0
- openeo_gfmap/preprocessing/sar.py +64 -0
- openeo_gfmap/preprocessing/scaling.py +65 -0
- openeo_gfmap/preprocessing/udf_cldmask.py +36 -0
- openeo_gfmap/preprocessing/udf_rank.py +37 -0
- openeo_gfmap/preprocessing/udf_score.py +103 -0
- openeo_gfmap/spatial.py +53 -0
- openeo_gfmap/stac/__init__.py +2 -0
- openeo_gfmap/stac/constants.py +51 -0
- openeo_gfmap/temporal.py +22 -0
- openeo_gfmap/utils/__init__.py +23 -0
- openeo_gfmap/utils/build_df.py +48 -0
- openeo_gfmap/utils/catalogue.py +248 -0
- openeo_gfmap/utils/intervals.py +64 -0
- openeo_gfmap/utils/netcdf.py +25 -0
- openeo_gfmap/utils/tile_processing.py +64 -0
- openeo_gfmap-0.1.0.dist-info/METADATA +57 -0
- openeo_gfmap-0.1.0.dist-info/RECORD +40 -0
- openeo_gfmap-0.1.0.dist-info/WHEEL +4 -0
- openeo_gfmap-0.1.0.dist-info/licenses/LICENSE +201 -0
openeo_gfmap/spatial.py
ADDED
@@ -0,0 +1,53 @@
|
|
1
|
+
""" Definitions of spatial context, either point-based or spatial"""
|
2
|
+
|
3
|
+
from dataclasses import dataclass
|
4
|
+
from typing import Union
|
5
|
+
|
6
|
+
from geojson import GeoJSON
|
7
|
+
from shapely.geometry import Polygon, box
|
8
|
+
|
9
|
+
|
10
|
+
@dataclass
|
11
|
+
class BoundingBoxExtent:
|
12
|
+
"""Definition of a bounding box as accepted by OpenEO
|
13
|
+
|
14
|
+
Contains the minx, miny, maxx, maxy coordinates expressed as west, south
|
15
|
+
east, north. The EPSG is also defined.
|
16
|
+
"""
|
17
|
+
|
18
|
+
west: float
|
19
|
+
south: float
|
20
|
+
east: float
|
21
|
+
north: float
|
22
|
+
epsg: int = 4326
|
23
|
+
|
24
|
+
def __dict__(self):
|
25
|
+
return {
|
26
|
+
"west": self.west,
|
27
|
+
"south": self.south,
|
28
|
+
"east": self.east,
|
29
|
+
"north": self.north,
|
30
|
+
"crs": f"EPSG:{self.epsg}",
|
31
|
+
"srs": f"EPSG:{self.epsg}",
|
32
|
+
}
|
33
|
+
|
34
|
+
def __iter__(self):
|
35
|
+
return iter(
|
36
|
+
[
|
37
|
+
("west", self.west),
|
38
|
+
("south", self.south),
|
39
|
+
("east", self.east),
|
40
|
+
("north", self.north),
|
41
|
+
("crs", f"EPSG:{self.epsg}"),
|
42
|
+
("srs", f"EPSG:{self.epsg}"),
|
43
|
+
]
|
44
|
+
)
|
45
|
+
|
46
|
+
def to_geometry(self) -> Polygon:
|
47
|
+
return box(self.west, self.south, self.east, self.north)
|
48
|
+
|
49
|
+
def to_geojson(self) -> GeoJSON:
|
50
|
+
return self.to_geometry().__geo_interface__
|
51
|
+
|
52
|
+
|
53
|
+
SpatialContext = Union[GeoJSON, BoundingBoxExtent, str]
|
@@ -0,0 +1,51 @@
|
|
1
|
+
"""
|
2
|
+
Constants in the STAC collection generated after a series of batch jobs
|
3
|
+
"""
|
4
|
+
|
5
|
+
import pystac
|
6
|
+
|
7
|
+
LICENSE = "CC-BY-4.0"
|
8
|
+
LICENSE_LINK = pystac.Link(
|
9
|
+
rel="license",
|
10
|
+
target="https://spdx.org/licenses/CC-BY-4.0.html",
|
11
|
+
media_type=pystac.MediaType.HTML,
|
12
|
+
title="Creative Commons Attribution 4.0 International License",
|
13
|
+
)
|
14
|
+
STAC_EXTENSIONS = [
|
15
|
+
"https://stac-extensions.github.io/eo/v1.1.0/schema.json",
|
16
|
+
"https://stac-extensions.github.io/file/v2.1.0/schema.json",
|
17
|
+
"https://stac-extensions.github.io/processing/v1.1.0/schema.json",
|
18
|
+
"https://stac-extensions.github.io/projection/v1.1.0/schema.json",
|
19
|
+
]
|
20
|
+
CONSTELLATION = {
|
21
|
+
"sentinel2": ["sentinel-2"],
|
22
|
+
"sentinel1": ["sentinel-1"],
|
23
|
+
}
|
24
|
+
|
25
|
+
PLATFORM = {
|
26
|
+
"sentinel2": ["sentinel-2a", "sentinel-2b"],
|
27
|
+
"sentinel1": ["sentinel-1a", "sentinel-1b"],
|
28
|
+
}
|
29
|
+
|
30
|
+
INSTRUMENTS = {"sentinel2": ["msi"], "sentinel1": ["c-sar"]}
|
31
|
+
|
32
|
+
GSD = {"sentinel2": [10, 20, 60], "sentinel1": [10]}
|
33
|
+
|
34
|
+
SUMMARIES = {
|
35
|
+
"sentinel2": pystac.summaries.Summaries(
|
36
|
+
{
|
37
|
+
"constellation": CONSTELLATION["sentinel2"],
|
38
|
+
"platform": PLATFORM["sentinel2"],
|
39
|
+
"instruments": INSTRUMENTS["sentinel2"],
|
40
|
+
"gsd": GSD["sentinel2"],
|
41
|
+
}
|
42
|
+
),
|
43
|
+
"sentinel1": pystac.summaries.Summaries(
|
44
|
+
{
|
45
|
+
"constellation": CONSTELLATION["sentinel1"],
|
46
|
+
"platform": PLATFORM["sentinel1"],
|
47
|
+
"instruments": INSTRUMENTS["sentinel1"],
|
48
|
+
"gsd": GSD["sentinel1"],
|
49
|
+
}
|
50
|
+
),
|
51
|
+
}
|
openeo_gfmap/temporal.py
ADDED
@@ -0,0 +1,22 @@
|
|
1
|
+
""" Definitions of temporal context"""
|
2
|
+
|
3
|
+
from dataclasses import dataclass
|
4
|
+
from datetime import datetime
|
5
|
+
|
6
|
+
|
7
|
+
@dataclass
|
8
|
+
class TemporalContext:
|
9
|
+
"""Temporal context is defined by a `start_date` and `end_date` values.
|
10
|
+
|
11
|
+
The value must be encoded on a YYYY-mm-dd format, e.g. 2020-01-01
|
12
|
+
"""
|
13
|
+
|
14
|
+
start_date: str
|
15
|
+
end_date: str
|
16
|
+
|
17
|
+
def to_datetime(self):
|
18
|
+
"""Converts the temporal context to a tuple of datetime objects."""
|
19
|
+
return (
|
20
|
+
datetime.strptime(self.start_date, "%Y-%m-%d"),
|
21
|
+
datetime.strptime(self.end_date, "%Y-%m-%d"),
|
22
|
+
)
|
@@ -0,0 +1,23 @@
|
|
1
|
+
"""This sub-module contains utilitary function and tools for OpenEO-GFMap"""
|
2
|
+
|
3
|
+
from openeo_gfmap.utils.build_df import load_json
|
4
|
+
from openeo_gfmap.utils.intervals import quintad_intervals
|
5
|
+
from openeo_gfmap.utils.netcdf import update_nc_attributes
|
6
|
+
from openeo_gfmap.utils.tile_processing import (
|
7
|
+
array_bounds,
|
8
|
+
arrays_cosine_similarity,
|
9
|
+
normalize_array,
|
10
|
+
select_optical_bands,
|
11
|
+
select_sar_bands,
|
12
|
+
)
|
13
|
+
|
14
|
+
__all__ = [
|
15
|
+
"load_json",
|
16
|
+
"normalize_array",
|
17
|
+
"select_optical_bands",
|
18
|
+
"array_bounds",
|
19
|
+
"select_sar_bands",
|
20
|
+
"arrays_cosine_similarity",
|
21
|
+
"quintad_intervals",
|
22
|
+
"update_nc_attributes",
|
23
|
+
]
|
@@ -0,0 +1,48 @@
|
|
1
|
+
"""Utilities to build a `pandas.DataFrame` from the output of a VectorCube
|
2
|
+
based job. Usefull to collect the output of point based extraction.
|
3
|
+
"""
|
4
|
+
|
5
|
+
from pathlib import Path
|
6
|
+
|
7
|
+
import pandas as pd
|
8
|
+
|
9
|
+
VECTORCUBE_TIMESTAMP_FORMAT = "%Y-%m-%d %H:%M:%S%z"
|
10
|
+
TIMESTAMP_FORMAT = "%Y-%m-%d"
|
11
|
+
|
12
|
+
|
13
|
+
def load_json(input_file: Path, bands: list) -> pd.DataFrame:
|
14
|
+
"""Reads a json file and outputs it as a proper pandas dataframe.
|
15
|
+
|
16
|
+
Parameters
|
17
|
+
----------
|
18
|
+
input_file: PathLike
|
19
|
+
The path of the JSON file to read.
|
20
|
+
bands: list
|
21
|
+
The name of the bands that will be used in the columns names. The band
|
22
|
+
names must be the same as the vector cube that resulted into the parsed
|
23
|
+
JSON file.
|
24
|
+
Returns
|
25
|
+
-------
|
26
|
+
df: pd.DataFrame
|
27
|
+
A `pandas.DataFrame` containing a combination of the band names and the
|
28
|
+
timestamps as column names.
|
29
|
+
For example, the Sentinel-2 green band on the 1st October 2020 is will
|
30
|
+
have the column name `S2-L2A-B02:2020-10-01`
|
31
|
+
"""
|
32
|
+
|
33
|
+
df = pd.read_json(input_file)
|
34
|
+
|
35
|
+
target_timestamps = list(
|
36
|
+
map(lambda date: date.strftime(TIMESTAMP_FORMAT), df.columns.to_pydatetime())
|
37
|
+
)
|
38
|
+
|
39
|
+
df = df.rename(dict(zip(df.columns, target_timestamps)), axis=1)
|
40
|
+
|
41
|
+
expanded_df = pd.DataFrame()
|
42
|
+
for col in df.columns:
|
43
|
+
expanded_col = pd.DataFrame(
|
44
|
+
df[col].to_list(), columns=[f"{feature}:{col}" for feature in bands]
|
45
|
+
)
|
46
|
+
expanded_df = pd.concat([expanded_df, expanded_col], axis=1)
|
47
|
+
|
48
|
+
return expanded_df
|
@@ -0,0 +1,248 @@
|
|
1
|
+
"""Functionalities to interract with product catalogues."""
|
2
|
+
|
3
|
+
import requests
|
4
|
+
from geojson import GeoJSON
|
5
|
+
from pyproj.crs import CRS
|
6
|
+
from rasterio.warp import transform_bounds
|
7
|
+
from shapely import unary_union
|
8
|
+
from shapely.geometry import box, shape
|
9
|
+
|
10
|
+
from openeo_gfmap import (
|
11
|
+
Backend,
|
12
|
+
BackendContext,
|
13
|
+
BoundingBoxExtent,
|
14
|
+
SpatialContext,
|
15
|
+
TemporalContext,
|
16
|
+
)
|
17
|
+
|
18
|
+
|
19
|
+
class UncoveredS1Exception(Exception):
|
20
|
+
"""Exception raised when there is no product available to fully cover spatially a given
|
21
|
+
spatio-temporal context for the Sentinel-1 collection."""
|
22
|
+
|
23
|
+
pass
|
24
|
+
|
25
|
+
|
26
|
+
def _parse_cdse_products(response: dict):
|
27
|
+
"""Parses the geometry of products from the CDSE catalogue."""
|
28
|
+
geoemetries = []
|
29
|
+
products = response["features"]
|
30
|
+
|
31
|
+
for product in products:
|
32
|
+
geoemetries.append(shape(product["geometry"]))
|
33
|
+
return geoemetries
|
34
|
+
|
35
|
+
|
36
|
+
def _query_cdse_catalogue(
|
37
|
+
collection: str,
|
38
|
+
bounds: list,
|
39
|
+
temporal_extent: TemporalContext,
|
40
|
+
**additional_parameters: dict,
|
41
|
+
) -> dict:
|
42
|
+
minx, miny, maxx, maxy = bounds
|
43
|
+
|
44
|
+
# The date format should be YYYY-MM-DD
|
45
|
+
start_date = f"{temporal_extent.start_date}T00:00:00Z"
|
46
|
+
end_date = f"{temporal_extent.end_date}T00:00:00Z"
|
47
|
+
|
48
|
+
url = (
|
49
|
+
f"https://catalogue.dataspace.copernicus.eu/resto/api/collections/"
|
50
|
+
f"{collection}/search.json?box={minx},{miny},{maxx},{maxy}"
|
51
|
+
f"&sortParam=startDate&maxRecords=100"
|
52
|
+
f"&dataset=ESA-DATASET&startDate={start_date}&completionDate={end_date}"
|
53
|
+
)
|
54
|
+
for key, value in additional_parameters.items():
|
55
|
+
url += f"&{key}={value}"
|
56
|
+
|
57
|
+
response = requests.get(url)
|
58
|
+
|
59
|
+
if response.status_code != 200:
|
60
|
+
raise Exception(
|
61
|
+
f"Cannot check S1 catalogue on CDSE: Request to {url} failed with "
|
62
|
+
f"status code {response.status_code}"
|
63
|
+
)
|
64
|
+
|
65
|
+
return response.json()
|
66
|
+
|
67
|
+
|
68
|
+
def _check_cdse_catalogue(
|
69
|
+
collection: str,
|
70
|
+
bounds: list,
|
71
|
+
temporal_extent: TemporalContext,
|
72
|
+
**additional_parameters: dict,
|
73
|
+
) -> bool:
|
74
|
+
"""Checks if there is at least one product available in the
|
75
|
+
given spatio-temporal context for a collection in the CDSE catalogue,
|
76
|
+
as there might be issues in the API that sometimes returns empty results
|
77
|
+
for a valid query.
|
78
|
+
|
79
|
+
Parameters
|
80
|
+
----------
|
81
|
+
collection : str
|
82
|
+
The collection name to be checked. (For example: Sentinel1 or Sentinel2)
|
83
|
+
spatial_extent : SpatialContext
|
84
|
+
The spatial extent to be checked, it will check within its bounding box.
|
85
|
+
temporal_extent : TemporalContext
|
86
|
+
The temporal period to be checked.
|
87
|
+
additional_parameters : Optional[dict], optional
|
88
|
+
Additional parameters to be passed to the catalogue, by default empty.
|
89
|
+
Parameters (key, value) will be passed as "&key=value" in the query,
|
90
|
+
for example: {"sortOrder": "ascending"} will be passed as "&ascendingOrder=True"
|
91
|
+
|
92
|
+
Returns
|
93
|
+
-------
|
94
|
+
True if there is at least one product, False otherwise.
|
95
|
+
"""
|
96
|
+
body = _query_cdse_catalogue(
|
97
|
+
collection, bounds, temporal_extent, **additional_parameters
|
98
|
+
)
|
99
|
+
|
100
|
+
grd_tiles = list(
|
101
|
+
filter(
|
102
|
+
lambda feature: feature["properties"]["productType"].contains("GRD"),
|
103
|
+
body["features"],
|
104
|
+
)
|
105
|
+
)
|
106
|
+
|
107
|
+
return len(grd_tiles) > 0
|
108
|
+
|
109
|
+
|
110
|
+
def s1_area_per_orbitstate(
|
111
|
+
backend: BackendContext,
|
112
|
+
spatial_extent: SpatialContext,
|
113
|
+
temporal_extent: TemporalContext,
|
114
|
+
) -> dict:
|
115
|
+
"""Evaluates for both the ascending and descending state orbits the area of interesection
|
116
|
+
between the given spatio-temporal context and the products available in the backend's
|
117
|
+
catalogue.
|
118
|
+
|
119
|
+
Parameters
|
120
|
+
----------
|
121
|
+
backend : BackendContext
|
122
|
+
The backend to be within, as each backend might use different catalogues.
|
123
|
+
spatial_extent : SpatialContext
|
124
|
+
The spatial extent to be checked, it will check within its bounding box.
|
125
|
+
temporal_extent : TemporalContext
|
126
|
+
The temporal period to be checked.
|
127
|
+
|
128
|
+
Returns
|
129
|
+
------
|
130
|
+
dict
|
131
|
+
Keys containing the orbit state and values containing the total area of intersection in
|
132
|
+
km^2
|
133
|
+
"""
|
134
|
+
if isinstance(spatial_extent, GeoJSON):
|
135
|
+
# Transform geojson into shapely geometry and compute bounds
|
136
|
+
bounds = shape(spatial_extent).bounds
|
137
|
+
epsg = 4362
|
138
|
+
elif isinstance(spatial_extent, BoundingBoxExtent):
|
139
|
+
bounds = [
|
140
|
+
spatial_extent.west,
|
141
|
+
spatial_extent.south,
|
142
|
+
spatial_extent.east,
|
143
|
+
spatial_extent.north,
|
144
|
+
]
|
145
|
+
epsg = spatial_extent.epsg
|
146
|
+
else:
|
147
|
+
raise ValueError(
|
148
|
+
"Provided spatial extent is not a valid GeoJSON or SpatialContext object."
|
149
|
+
)
|
150
|
+
# Warp the bounds if the epsg is different from 4326
|
151
|
+
if epsg != 4326:
|
152
|
+
bounds = transform_bounds(CRS.from_epsg(epsg), CRS.from_epsg(4326), *bounds)
|
153
|
+
|
154
|
+
# Queries the products in the catalogues
|
155
|
+
if backend.backend in [Backend.CDSE, Backend.CDSE_STAGING, Backend.FED]:
|
156
|
+
ascending_products = _parse_cdse_products(
|
157
|
+
_query_cdse_catalogue(
|
158
|
+
"Sentinel1", bounds, temporal_extent, orbitDirection="ASCENDING"
|
159
|
+
)
|
160
|
+
)
|
161
|
+
descending_products = _parse_cdse_products(
|
162
|
+
_query_cdse_catalogue(
|
163
|
+
"Sentinel1",
|
164
|
+
bounds,
|
165
|
+
temporal_extent,
|
166
|
+
orbitDirection="DESCENDING",
|
167
|
+
)
|
168
|
+
)
|
169
|
+
else:
|
170
|
+
raise NotImplementedError(
|
171
|
+
f"This feature is not supported for backend: {backend.backend}."
|
172
|
+
)
|
173
|
+
|
174
|
+
# Builds the shape of the spatial extent and computes the area
|
175
|
+
spatial_extent = box(*bounds)
|
176
|
+
|
177
|
+
# Computes if there is the full overlap for each of those states
|
178
|
+
union_ascending = unary_union(ascending_products)
|
179
|
+
union_descending = unary_union(descending_products)
|
180
|
+
|
181
|
+
ascending_covers = union_ascending.contains(spatial_extent)
|
182
|
+
descending_covers = union_descending.contains(spatial_extent)
|
183
|
+
|
184
|
+
# Computes the area of intersection
|
185
|
+
return {
|
186
|
+
"ASCENDING": {
|
187
|
+
"full_overlap": ascending_covers,
|
188
|
+
"area": sum(
|
189
|
+
product.intersection(spatial_extent).area
|
190
|
+
for product in ascending_products
|
191
|
+
),
|
192
|
+
},
|
193
|
+
"DESCENDING": {
|
194
|
+
"full_overlap": descending_covers,
|
195
|
+
"area": sum(
|
196
|
+
product.intersection(spatial_extent).area
|
197
|
+
for product in descending_products
|
198
|
+
),
|
199
|
+
},
|
200
|
+
}
|
201
|
+
|
202
|
+
|
203
|
+
def select_S1_orbitstate(
|
204
|
+
backend: BackendContext,
|
205
|
+
spatial_extent: SpatialContext,
|
206
|
+
temporal_extent: TemporalContext,
|
207
|
+
) -> str:
|
208
|
+
"""Selects the orbit state that covers the most area of the given spatio-temporal context
|
209
|
+
for the Sentinel-1 collection.
|
210
|
+
|
211
|
+
Parameters
|
212
|
+
----------
|
213
|
+
backend : BackendContext
|
214
|
+
The backend to be within, as each backend might use different catalogues.
|
215
|
+
spatial_extent : SpatialContext
|
216
|
+
The spatial extent to be checked, it will check within its bounding box.
|
217
|
+
temporal_extent : TemporalContext
|
218
|
+
The temporal period to be checked.
|
219
|
+
|
220
|
+
Returns
|
221
|
+
------
|
222
|
+
str
|
223
|
+
The orbit state that covers the most area of the given spatio-temporal context
|
224
|
+
"""
|
225
|
+
|
226
|
+
# Queries the products in the catalogues
|
227
|
+
areas = s1_area_per_orbitstate(backend, spatial_extent, temporal_extent)
|
228
|
+
|
229
|
+
ascending_overlap = areas["ASCENDING"]["full_overlap"]
|
230
|
+
descending_overlap = areas["DESCENDING"]["full_overlap"]
|
231
|
+
|
232
|
+
if ascending_overlap and not descending_overlap:
|
233
|
+
return "ASCENDING"
|
234
|
+
elif descending_overlap and not ascending_overlap:
|
235
|
+
return "DESCENDING"
|
236
|
+
elif ascending_overlap and descending_overlap:
|
237
|
+
ascending_cover_area = areas["ASCENDING"]["area"]
|
238
|
+
descending_cover_area = areas["DESCENDING"]["area"]
|
239
|
+
|
240
|
+
# Selects the orbit state that covers the most area
|
241
|
+
if ascending_cover_area > descending_cover_area:
|
242
|
+
return "ASCENDING"
|
243
|
+
else:
|
244
|
+
return "DESCENDING"
|
245
|
+
|
246
|
+
raise UncoveredS1Exception(
|
247
|
+
"No product available to fully cover the given spatio-temporal context."
|
248
|
+
)
|
@@ -0,0 +1,64 @@
|
|
1
|
+
"""Utilitary function for intervals, useful for temporal aggregation
|
2
|
+
methods.
|
3
|
+
"""
|
4
|
+
|
5
|
+
from datetime import timedelta
|
6
|
+
|
7
|
+
from openeo_gfmap import TemporalContext
|
8
|
+
|
9
|
+
|
10
|
+
def quintad_intervals(temporal_extent: TemporalContext) -> list:
|
11
|
+
"""Returns a list of tuples (start_date, end_date) of quintad intervals
|
12
|
+
from the input temporal extent. Quintad intervals are intervals of
|
13
|
+
generally 5 days, that never overlap two months.
|
14
|
+
|
15
|
+
All months are divided in 6 quintads, where the 6th quintad might
|
16
|
+
contain 6 days for months of 31 days.
|
17
|
+
For the month of February, the 6th quintad is only of three days, or
|
18
|
+
four days for the leap year.
|
19
|
+
"""
|
20
|
+
start_date, end_date = temporal_extent.to_datetime()
|
21
|
+
quintads = []
|
22
|
+
|
23
|
+
current_date = start_date
|
24
|
+
|
25
|
+
# Compute the offset of the first day on the start of the last quintad
|
26
|
+
if start_date.day != 1:
|
27
|
+
offset = (start_date - timedelta(days=1)).day % 5
|
28
|
+
current_date = current_date - timedelta(days=offset)
|
29
|
+
else:
|
30
|
+
offset = 0
|
31
|
+
|
32
|
+
while current_date <= end_date:
|
33
|
+
# Get the last day of the current month
|
34
|
+
last_day = current_date.replace(day=28) + timedelta(days=4)
|
35
|
+
last_day = last_day - timedelta(days=last_day.day)
|
36
|
+
|
37
|
+
# Get the last day of the current quintad
|
38
|
+
last_quintad = current_date + timedelta(days=4)
|
39
|
+
|
40
|
+
# Add a day if the day is the 30th and there is the 31th in the current month
|
41
|
+
if last_quintad.day == 30 and last_day.day == 31:
|
42
|
+
last_quintad = last_quintad + timedelta(days=1)
|
43
|
+
|
44
|
+
# If the last quintad is after the last day of the month, then
|
45
|
+
# set it to the last day of the month
|
46
|
+
if last_quintad > last_day:
|
47
|
+
last_quintad = last_day
|
48
|
+
# In the case the last quintad is after the end date, then set it to the end date
|
49
|
+
elif last_quintad > end_date:
|
50
|
+
last_quintad = end_date
|
51
|
+
|
52
|
+
quintads.append((current_date, last_quintad))
|
53
|
+
|
54
|
+
# Set the current date to the next quintad
|
55
|
+
current_date = last_quintad + timedelta(days=1)
|
56
|
+
|
57
|
+
# Fixing the offset issue for intervals starting in the middle of a quintad
|
58
|
+
quintads[0] = (quintads[0][0] + timedelta(days=offset), quintads[0][1])
|
59
|
+
|
60
|
+
# Returns to string with the YYYY-mm-dd format
|
61
|
+
return [
|
62
|
+
(start_date.strftime("%Y-%m-%d"), end_date.strftime("%Y-%m-%d"))
|
63
|
+
for start_date, end_date in quintads
|
64
|
+
]
|
@@ -0,0 +1,25 @@
|
|
1
|
+
"""Utilities to edit and update netCDF files.
|
2
|
+
"""
|
3
|
+
|
4
|
+
from pathlib import Path
|
5
|
+
from typing import Union
|
6
|
+
|
7
|
+
from netCDF4 import Dataset
|
8
|
+
|
9
|
+
|
10
|
+
def update_nc_attributes(path: Union[str, Path], attributes: dict):
|
11
|
+
"""
|
12
|
+
Update attributes of a NetCDF file.
|
13
|
+
|
14
|
+
Parameters:
|
15
|
+
path (str): Path to the NetCDF file.
|
16
|
+
attributes (dict): Dictionary containing attributes to update.
|
17
|
+
Keys are attribute names, values are attribute values.
|
18
|
+
"""
|
19
|
+
|
20
|
+
with Dataset(path, "r+") as nc:
|
21
|
+
for name, value in attributes.items():
|
22
|
+
if name in nc.ncattrs():
|
23
|
+
setattr(nc, name, value)
|
24
|
+
else:
|
25
|
+
nc.setncattr(name, value)
|
@@ -0,0 +1,64 @@
|
|
1
|
+
"""Utilitaries to process data tiles."""
|
2
|
+
|
3
|
+
import numpy as np
|
4
|
+
import xarray as xr
|
5
|
+
|
6
|
+
|
7
|
+
def normalize_array(inarr: xr.DataArray, percentile: float = 0.99) -> xr.DataArray:
|
8
|
+
"""Performs normalization between 0.0 and 1.0 using the given
|
9
|
+
percentile.
|
10
|
+
"""
|
11
|
+
quantile_value = inarr.quantile(percentile, dim=["x", "y", "t"])
|
12
|
+
minimum = inarr.min(dim=["x", "y", "t"])
|
13
|
+
|
14
|
+
inarr = (inarr - minimum) / (quantile_value - minimum)
|
15
|
+
|
16
|
+
# Perform clipping on values that are higher than the computed quantile
|
17
|
+
return inarr.where(inarr < 1.0, 1.0)
|
18
|
+
|
19
|
+
|
20
|
+
def select_optical_bands(inarr: xr.DataArray) -> xr.DataArray:
|
21
|
+
"""Filters and keep only the optical bands for a given array."""
|
22
|
+
return inarr.sel(
|
23
|
+
bands=[
|
24
|
+
band
|
25
|
+
for band in inarr.coords["bands"].to_numpy()
|
26
|
+
if band.startswith("S2-L2A-B")
|
27
|
+
]
|
28
|
+
)
|
29
|
+
|
30
|
+
|
31
|
+
def select_sar_bands(inarr: xr.DataArray) -> xr.DataArray:
|
32
|
+
"""Filters and keep only the SAR bands for a given array."""
|
33
|
+
return inarr.sel(
|
34
|
+
bands=[
|
35
|
+
band
|
36
|
+
for band in inarr.coords["bands"].to_numpy()
|
37
|
+
if band in ["S1-SIGMA0-VV", "S1-SIGMA0-VH", "S1-SIGMA0-HH", "S1-SIGMA0-HV"]
|
38
|
+
]
|
39
|
+
)
|
40
|
+
|
41
|
+
|
42
|
+
def array_bounds(inarr: xr.DataArray) -> tuple:
|
43
|
+
"""Returns the 4 bounds values for the x and y coordinates of the tile"""
|
44
|
+
return (
|
45
|
+
inarr.coords["x"].min().item(),
|
46
|
+
inarr.coords["y"].min().item(),
|
47
|
+
inarr.coords["x"].max().item(),
|
48
|
+
inarr.coords["y"].max().item(),
|
49
|
+
)
|
50
|
+
|
51
|
+
|
52
|
+
def arrays_cosine_similarity(
|
53
|
+
first_array: xr.DataArray, second_array: xr.DataArray
|
54
|
+
) -> float:
|
55
|
+
"""Returns a similarity score based on normalized cosine distance. The
|
56
|
+
input arrays must have similar ranges to obtain a valid score.
|
57
|
+
1.0 represents the best score (same tiles), while 0.0 is the worst score.
|
58
|
+
"""
|
59
|
+
dot_product = np.sum(first_array * second_array)
|
60
|
+
first_norm = np.linalg.norm(first_array)
|
61
|
+
second_norm = np.linalg.norm(second_array)
|
62
|
+
similarity = (dot_product / (first_norm * second_norm)).item()
|
63
|
+
|
64
|
+
return similarity
|
@@ -0,0 +1,57 @@
|
|
1
|
+
Metadata-Version: 2.3
|
2
|
+
Name: openeo_gfmap
|
3
|
+
Version: 0.1.0
|
4
|
+
Summary: OpenEO General Framework for Mapping
|
5
|
+
Project-URL: Homepage, https://github.com/Open-EO/openeo-gfmap
|
6
|
+
Project-URL: Bug Tracker, https://github.com/Open-EO/openeo-gfmap/issues
|
7
|
+
Author: Darius Couchard, Vincent Verelst, Kristof Van Tricht, Stefaan Lippens, Jeroen Degerickx
|
8
|
+
License-File: LICENSE
|
9
|
+
Classifier: Operating System :: OS Independent
|
10
|
+
Classifier: Programming Language :: Python :: 3
|
11
|
+
Requires-Python: >=3.8
|
12
|
+
Requires-Dist: cftime
|
13
|
+
Requires-Dist: fastparquet
|
14
|
+
Requires-Dist: geojson>=3.0.0
|
15
|
+
Requires-Dist: geopandas
|
16
|
+
Requires-Dist: h3
|
17
|
+
Requires-Dist: h5netcdf>=1.2.0
|
18
|
+
Requires-Dist: netcdf4
|
19
|
+
Requires-Dist: numpy<2.0.0
|
20
|
+
Requires-Dist: onnxruntime
|
21
|
+
Requires-Dist: openeo
|
22
|
+
Requires-Dist: pyarrow
|
23
|
+
Requires-Dist: rasterio
|
24
|
+
Requires-Dist: scipy
|
25
|
+
Provides-Extra: dev
|
26
|
+
Requires-Dist: matplotlib>=3.3.0; extra == 'dev'
|
27
|
+
Requires-Dist: pre-commit; extra == 'dev'
|
28
|
+
Requires-Dist: pytest-depends; extra == 'dev'
|
29
|
+
Requires-Dist: pytest-timeout>=2.2.0; extra == 'dev'
|
30
|
+
Requires-Dist: pytest>=7.4.0; extra == 'dev'
|
31
|
+
Requires-Dist: rioxarray>=0.13.0; extra == 'dev'
|
32
|
+
Requires-Dist: xarray>=2022.3.0; extra == 'dev'
|
33
|
+
Description-Content-Type: text/markdown
|
34
|
+
|
35
|
+
# OpenEO General Framework for Mapping
|
36
|
+
|
37
|
+
openEO GFMap aims to simplify for its users the development of mapping applications through Remote Sensing data by leveraging the power of [OpenEO](https://openeo.org/). This framework is developed primarily for Crop Type mapping and Land Cover Classification, but other applications such as regression tasks can be applied.
|
38
|
+
|
39
|
+
## How is it used?
|
40
|
+
|
41
|
+
In order to be used, the user has to specify which kind of input data it expects (satellite, meteo, DEM, ...) and which mode of classification it expects (point based mapping or polygon based). The user specifies then two <i>user defined files (UDF)</i>, one for extracting features from the pre-processed data and the other for performing classification through a model.
|
42
|
+
|
43
|
+
The Frameworks provides assistance in extraction of training data as well as inference phase, and makes sure that both training data and inference data are processed the same way before passing through the model. The user is responsible for the machine learning related details, and for the training phase itself.
|
44
|
+
|
45
|
+
<p align="center">
|
46
|
+
<img src="./workflow.png">
|
47
|
+
</p>
|
48
|
+
|
49
|
+
## Framework core principles
|
50
|
+
|
51
|
+
1. <b>Backend agnostic</b>: The users shouldn’t have to take care of backend related configurations. The use of OpenEO can vary depending on the backend that is currently in use (for example, the name of data collections). The framework takes care of those differences, while the users only specify the backend they desire to use.
|
52
|
+
|
53
|
+
2. <b> Data consistent</b>: providing a common pipeline for training and for inference. The best way of making sure data is processed the same way during the construction of a training dataset than during inference, is to re-use as much as code as possible. The users should be able to extract and preprocess training data with the same configuration. OpenEO leaves the possibility to perform data extraction on sparse points/polygons or directly on dense datacubes. This leaves the possibility to implement a framework that could do both tile-based inference and pixel-based or parcel-based data extraction/preprocessing using the same code.
|
54
|
+
|
55
|
+
3. <b>Easy and Collaborative</b>: Pre-implementing common preprocessing/postprocessing routines. Many operations, such a compositing or linear interpolation, are very common within Remote Sensing applications and should be already implemented in the framework. This will avoid code duplication among the personal code of the framework’s users and encourage collaboration for improvement and optimization of existing techniques.
|
56
|
+
|
57
|
+
4. <b>Performant</b>: Leverage OpenEO processes as much as possible for preprocessing. In the cropclass and worldcereal projects, preprocessing is performed with a combination of OpenEO processes (masking, compositing, linear interpolation) and the implementation of a Feature Extractor within an UDF (computing indices, percentiles). Ideally, OpenEO features should be used as much as possible, while the features extractor should be as simple as possible, only implementing what is currently not possible within OpenEO.
|