openeo-gfmap 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (40) hide show
  1. openeo_gfmap/__init__.py +23 -0
  2. openeo_gfmap/backend.py +122 -0
  3. openeo_gfmap/features/__init__.py +17 -0
  4. openeo_gfmap/features/feature_extractor.py +389 -0
  5. openeo_gfmap/fetching/__init__.py +21 -0
  6. openeo_gfmap/fetching/commons.py +213 -0
  7. openeo_gfmap/fetching/fetching.py +98 -0
  8. openeo_gfmap/fetching/generic.py +165 -0
  9. openeo_gfmap/fetching/meteo.py +126 -0
  10. openeo_gfmap/fetching/s1.py +195 -0
  11. openeo_gfmap/fetching/s2.py +236 -0
  12. openeo_gfmap/inference/__init__.py +3 -0
  13. openeo_gfmap/inference/model_inference.py +347 -0
  14. openeo_gfmap/manager/__init__.py +31 -0
  15. openeo_gfmap/manager/job_manager.py +469 -0
  16. openeo_gfmap/manager/job_splitters.py +144 -0
  17. openeo_gfmap/metadata.py +24 -0
  18. openeo_gfmap/preprocessing/__init__.py +22 -0
  19. openeo_gfmap/preprocessing/cloudmasking.py +268 -0
  20. openeo_gfmap/preprocessing/compositing.py +74 -0
  21. openeo_gfmap/preprocessing/interpolation.py +12 -0
  22. openeo_gfmap/preprocessing/sar.py +64 -0
  23. openeo_gfmap/preprocessing/scaling.py +65 -0
  24. openeo_gfmap/preprocessing/udf_cldmask.py +36 -0
  25. openeo_gfmap/preprocessing/udf_rank.py +37 -0
  26. openeo_gfmap/preprocessing/udf_score.py +103 -0
  27. openeo_gfmap/spatial.py +53 -0
  28. openeo_gfmap/stac/__init__.py +2 -0
  29. openeo_gfmap/stac/constants.py +51 -0
  30. openeo_gfmap/temporal.py +22 -0
  31. openeo_gfmap/utils/__init__.py +23 -0
  32. openeo_gfmap/utils/build_df.py +48 -0
  33. openeo_gfmap/utils/catalogue.py +248 -0
  34. openeo_gfmap/utils/intervals.py +64 -0
  35. openeo_gfmap/utils/netcdf.py +25 -0
  36. openeo_gfmap/utils/tile_processing.py +64 -0
  37. openeo_gfmap-0.1.0.dist-info/METADATA +57 -0
  38. openeo_gfmap-0.1.0.dist-info/RECORD +40 -0
  39. openeo_gfmap-0.1.0.dist-info/WHEEL +4 -0
  40. openeo_gfmap-0.1.0.dist-info/licenses/LICENSE +201 -0
@@ -0,0 +1,53 @@
1
+ """ Definitions of spatial context, either point-based or spatial"""
2
+
3
+ from dataclasses import dataclass
4
+ from typing import Union
5
+
6
+ from geojson import GeoJSON
7
+ from shapely.geometry import Polygon, box
8
+
9
+
10
+ @dataclass
11
+ class BoundingBoxExtent:
12
+ """Definition of a bounding box as accepted by OpenEO
13
+
14
+ Contains the minx, miny, maxx, maxy coordinates expressed as west, south
15
+ east, north. The EPSG is also defined.
16
+ """
17
+
18
+ west: float
19
+ south: float
20
+ east: float
21
+ north: float
22
+ epsg: int = 4326
23
+
24
+ def __dict__(self):
25
+ return {
26
+ "west": self.west,
27
+ "south": self.south,
28
+ "east": self.east,
29
+ "north": self.north,
30
+ "crs": f"EPSG:{self.epsg}",
31
+ "srs": f"EPSG:{self.epsg}",
32
+ }
33
+
34
+ def __iter__(self):
35
+ return iter(
36
+ [
37
+ ("west", self.west),
38
+ ("south", self.south),
39
+ ("east", self.east),
40
+ ("north", self.north),
41
+ ("crs", f"EPSG:{self.epsg}"),
42
+ ("srs", f"EPSG:{self.epsg}"),
43
+ ]
44
+ )
45
+
46
+ def to_geometry(self) -> Polygon:
47
+ return box(self.west, self.south, self.east, self.north)
48
+
49
+ def to_geojson(self) -> GeoJSON:
50
+ return self.to_geometry().__geo_interface__
51
+
52
+
53
+ SpatialContext = Union[GeoJSON, BoundingBoxExtent, str]
@@ -0,0 +1,2 @@
1
+ """Definitions of the constants in the STAC collection
2
+ """
@@ -0,0 +1,51 @@
1
+ """
2
+ Constants in the STAC collection generated after a series of batch jobs
3
+ """
4
+
5
+ import pystac
6
+
7
+ LICENSE = "CC-BY-4.0"
8
+ LICENSE_LINK = pystac.Link(
9
+ rel="license",
10
+ target="https://spdx.org/licenses/CC-BY-4.0.html",
11
+ media_type=pystac.MediaType.HTML,
12
+ title="Creative Commons Attribution 4.0 International License",
13
+ )
14
+ STAC_EXTENSIONS = [
15
+ "https://stac-extensions.github.io/eo/v1.1.0/schema.json",
16
+ "https://stac-extensions.github.io/file/v2.1.0/schema.json",
17
+ "https://stac-extensions.github.io/processing/v1.1.0/schema.json",
18
+ "https://stac-extensions.github.io/projection/v1.1.0/schema.json",
19
+ ]
20
+ CONSTELLATION = {
21
+ "sentinel2": ["sentinel-2"],
22
+ "sentinel1": ["sentinel-1"],
23
+ }
24
+
25
+ PLATFORM = {
26
+ "sentinel2": ["sentinel-2a", "sentinel-2b"],
27
+ "sentinel1": ["sentinel-1a", "sentinel-1b"],
28
+ }
29
+
30
+ INSTRUMENTS = {"sentinel2": ["msi"], "sentinel1": ["c-sar"]}
31
+
32
+ GSD = {"sentinel2": [10, 20, 60], "sentinel1": [10]}
33
+
34
+ SUMMARIES = {
35
+ "sentinel2": pystac.summaries.Summaries(
36
+ {
37
+ "constellation": CONSTELLATION["sentinel2"],
38
+ "platform": PLATFORM["sentinel2"],
39
+ "instruments": INSTRUMENTS["sentinel2"],
40
+ "gsd": GSD["sentinel2"],
41
+ }
42
+ ),
43
+ "sentinel1": pystac.summaries.Summaries(
44
+ {
45
+ "constellation": CONSTELLATION["sentinel1"],
46
+ "platform": PLATFORM["sentinel1"],
47
+ "instruments": INSTRUMENTS["sentinel1"],
48
+ "gsd": GSD["sentinel1"],
49
+ }
50
+ ),
51
+ }
@@ -0,0 +1,22 @@
1
+ """ Definitions of temporal context"""
2
+
3
+ from dataclasses import dataclass
4
+ from datetime import datetime
5
+
6
+
7
+ @dataclass
8
+ class TemporalContext:
9
+ """Temporal context is defined by a `start_date` and `end_date` values.
10
+
11
+ The value must be encoded on a YYYY-mm-dd format, e.g. 2020-01-01
12
+ """
13
+
14
+ start_date: str
15
+ end_date: str
16
+
17
+ def to_datetime(self):
18
+ """Converts the temporal context to a tuple of datetime objects."""
19
+ return (
20
+ datetime.strptime(self.start_date, "%Y-%m-%d"),
21
+ datetime.strptime(self.end_date, "%Y-%m-%d"),
22
+ )
@@ -0,0 +1,23 @@
1
+ """This sub-module contains utilitary function and tools for OpenEO-GFMap"""
2
+
3
+ from openeo_gfmap.utils.build_df import load_json
4
+ from openeo_gfmap.utils.intervals import quintad_intervals
5
+ from openeo_gfmap.utils.netcdf import update_nc_attributes
6
+ from openeo_gfmap.utils.tile_processing import (
7
+ array_bounds,
8
+ arrays_cosine_similarity,
9
+ normalize_array,
10
+ select_optical_bands,
11
+ select_sar_bands,
12
+ )
13
+
14
+ __all__ = [
15
+ "load_json",
16
+ "normalize_array",
17
+ "select_optical_bands",
18
+ "array_bounds",
19
+ "select_sar_bands",
20
+ "arrays_cosine_similarity",
21
+ "quintad_intervals",
22
+ "update_nc_attributes",
23
+ ]
@@ -0,0 +1,48 @@
1
+ """Utilities to build a `pandas.DataFrame` from the output of a VectorCube
2
+ based job. Usefull to collect the output of point based extraction.
3
+ """
4
+
5
+ from pathlib import Path
6
+
7
+ import pandas as pd
8
+
9
+ VECTORCUBE_TIMESTAMP_FORMAT = "%Y-%m-%d %H:%M:%S%z"
10
+ TIMESTAMP_FORMAT = "%Y-%m-%d"
11
+
12
+
13
+ def load_json(input_file: Path, bands: list) -> pd.DataFrame:
14
+ """Reads a json file and outputs it as a proper pandas dataframe.
15
+
16
+ Parameters
17
+ ----------
18
+ input_file: PathLike
19
+ The path of the JSON file to read.
20
+ bands: list
21
+ The name of the bands that will be used in the columns names. The band
22
+ names must be the same as the vector cube that resulted into the parsed
23
+ JSON file.
24
+ Returns
25
+ -------
26
+ df: pd.DataFrame
27
+ A `pandas.DataFrame` containing a combination of the band names and the
28
+ timestamps as column names.
29
+ For example, the Sentinel-2 green band on the 1st October 2020 is will
30
+ have the column name `S2-L2A-B02:2020-10-01`
31
+ """
32
+
33
+ df = pd.read_json(input_file)
34
+
35
+ target_timestamps = list(
36
+ map(lambda date: date.strftime(TIMESTAMP_FORMAT), df.columns.to_pydatetime())
37
+ )
38
+
39
+ df = df.rename(dict(zip(df.columns, target_timestamps)), axis=1)
40
+
41
+ expanded_df = pd.DataFrame()
42
+ for col in df.columns:
43
+ expanded_col = pd.DataFrame(
44
+ df[col].to_list(), columns=[f"{feature}:{col}" for feature in bands]
45
+ )
46
+ expanded_df = pd.concat([expanded_df, expanded_col], axis=1)
47
+
48
+ return expanded_df
@@ -0,0 +1,248 @@
1
+ """Functionalities to interract with product catalogues."""
2
+
3
+ import requests
4
+ from geojson import GeoJSON
5
+ from pyproj.crs import CRS
6
+ from rasterio.warp import transform_bounds
7
+ from shapely import unary_union
8
+ from shapely.geometry import box, shape
9
+
10
+ from openeo_gfmap import (
11
+ Backend,
12
+ BackendContext,
13
+ BoundingBoxExtent,
14
+ SpatialContext,
15
+ TemporalContext,
16
+ )
17
+
18
+
19
+ class UncoveredS1Exception(Exception):
20
+ """Exception raised when there is no product available to fully cover spatially a given
21
+ spatio-temporal context for the Sentinel-1 collection."""
22
+
23
+ pass
24
+
25
+
26
+ def _parse_cdse_products(response: dict):
27
+ """Parses the geometry of products from the CDSE catalogue."""
28
+ geoemetries = []
29
+ products = response["features"]
30
+
31
+ for product in products:
32
+ geoemetries.append(shape(product["geometry"]))
33
+ return geoemetries
34
+
35
+
36
+ def _query_cdse_catalogue(
37
+ collection: str,
38
+ bounds: list,
39
+ temporal_extent: TemporalContext,
40
+ **additional_parameters: dict,
41
+ ) -> dict:
42
+ minx, miny, maxx, maxy = bounds
43
+
44
+ # The date format should be YYYY-MM-DD
45
+ start_date = f"{temporal_extent.start_date}T00:00:00Z"
46
+ end_date = f"{temporal_extent.end_date}T00:00:00Z"
47
+
48
+ url = (
49
+ f"https://catalogue.dataspace.copernicus.eu/resto/api/collections/"
50
+ f"{collection}/search.json?box={minx},{miny},{maxx},{maxy}"
51
+ f"&sortParam=startDate&maxRecords=100"
52
+ f"&dataset=ESA-DATASET&startDate={start_date}&completionDate={end_date}"
53
+ )
54
+ for key, value in additional_parameters.items():
55
+ url += f"&{key}={value}"
56
+
57
+ response = requests.get(url)
58
+
59
+ if response.status_code != 200:
60
+ raise Exception(
61
+ f"Cannot check S1 catalogue on CDSE: Request to {url} failed with "
62
+ f"status code {response.status_code}"
63
+ )
64
+
65
+ return response.json()
66
+
67
+
68
+ def _check_cdse_catalogue(
69
+ collection: str,
70
+ bounds: list,
71
+ temporal_extent: TemporalContext,
72
+ **additional_parameters: dict,
73
+ ) -> bool:
74
+ """Checks if there is at least one product available in the
75
+ given spatio-temporal context for a collection in the CDSE catalogue,
76
+ as there might be issues in the API that sometimes returns empty results
77
+ for a valid query.
78
+
79
+ Parameters
80
+ ----------
81
+ collection : str
82
+ The collection name to be checked. (For example: Sentinel1 or Sentinel2)
83
+ spatial_extent : SpatialContext
84
+ The spatial extent to be checked, it will check within its bounding box.
85
+ temporal_extent : TemporalContext
86
+ The temporal period to be checked.
87
+ additional_parameters : Optional[dict], optional
88
+ Additional parameters to be passed to the catalogue, by default empty.
89
+ Parameters (key, value) will be passed as "&key=value" in the query,
90
+ for example: {"sortOrder": "ascending"} will be passed as "&ascendingOrder=True"
91
+
92
+ Returns
93
+ -------
94
+ True if there is at least one product, False otherwise.
95
+ """
96
+ body = _query_cdse_catalogue(
97
+ collection, bounds, temporal_extent, **additional_parameters
98
+ )
99
+
100
+ grd_tiles = list(
101
+ filter(
102
+ lambda feature: feature["properties"]["productType"].contains("GRD"),
103
+ body["features"],
104
+ )
105
+ )
106
+
107
+ return len(grd_tiles) > 0
108
+
109
+
110
+ def s1_area_per_orbitstate(
111
+ backend: BackendContext,
112
+ spatial_extent: SpatialContext,
113
+ temporal_extent: TemporalContext,
114
+ ) -> dict:
115
+ """Evaluates for both the ascending and descending state orbits the area of interesection
116
+ between the given spatio-temporal context and the products available in the backend's
117
+ catalogue.
118
+
119
+ Parameters
120
+ ----------
121
+ backend : BackendContext
122
+ The backend to be within, as each backend might use different catalogues.
123
+ spatial_extent : SpatialContext
124
+ The spatial extent to be checked, it will check within its bounding box.
125
+ temporal_extent : TemporalContext
126
+ The temporal period to be checked.
127
+
128
+ Returns
129
+ ------
130
+ dict
131
+ Keys containing the orbit state and values containing the total area of intersection in
132
+ km^2
133
+ """
134
+ if isinstance(spatial_extent, GeoJSON):
135
+ # Transform geojson into shapely geometry and compute bounds
136
+ bounds = shape(spatial_extent).bounds
137
+ epsg = 4362
138
+ elif isinstance(spatial_extent, BoundingBoxExtent):
139
+ bounds = [
140
+ spatial_extent.west,
141
+ spatial_extent.south,
142
+ spatial_extent.east,
143
+ spatial_extent.north,
144
+ ]
145
+ epsg = spatial_extent.epsg
146
+ else:
147
+ raise ValueError(
148
+ "Provided spatial extent is not a valid GeoJSON or SpatialContext object."
149
+ )
150
+ # Warp the bounds if the epsg is different from 4326
151
+ if epsg != 4326:
152
+ bounds = transform_bounds(CRS.from_epsg(epsg), CRS.from_epsg(4326), *bounds)
153
+
154
+ # Queries the products in the catalogues
155
+ if backend.backend in [Backend.CDSE, Backend.CDSE_STAGING, Backend.FED]:
156
+ ascending_products = _parse_cdse_products(
157
+ _query_cdse_catalogue(
158
+ "Sentinel1", bounds, temporal_extent, orbitDirection="ASCENDING"
159
+ )
160
+ )
161
+ descending_products = _parse_cdse_products(
162
+ _query_cdse_catalogue(
163
+ "Sentinel1",
164
+ bounds,
165
+ temporal_extent,
166
+ orbitDirection="DESCENDING",
167
+ )
168
+ )
169
+ else:
170
+ raise NotImplementedError(
171
+ f"This feature is not supported for backend: {backend.backend}."
172
+ )
173
+
174
+ # Builds the shape of the spatial extent and computes the area
175
+ spatial_extent = box(*bounds)
176
+
177
+ # Computes if there is the full overlap for each of those states
178
+ union_ascending = unary_union(ascending_products)
179
+ union_descending = unary_union(descending_products)
180
+
181
+ ascending_covers = union_ascending.contains(spatial_extent)
182
+ descending_covers = union_descending.contains(spatial_extent)
183
+
184
+ # Computes the area of intersection
185
+ return {
186
+ "ASCENDING": {
187
+ "full_overlap": ascending_covers,
188
+ "area": sum(
189
+ product.intersection(spatial_extent).area
190
+ for product in ascending_products
191
+ ),
192
+ },
193
+ "DESCENDING": {
194
+ "full_overlap": descending_covers,
195
+ "area": sum(
196
+ product.intersection(spatial_extent).area
197
+ for product in descending_products
198
+ ),
199
+ },
200
+ }
201
+
202
+
203
+ def select_S1_orbitstate(
204
+ backend: BackendContext,
205
+ spatial_extent: SpatialContext,
206
+ temporal_extent: TemporalContext,
207
+ ) -> str:
208
+ """Selects the orbit state that covers the most area of the given spatio-temporal context
209
+ for the Sentinel-1 collection.
210
+
211
+ Parameters
212
+ ----------
213
+ backend : BackendContext
214
+ The backend to be within, as each backend might use different catalogues.
215
+ spatial_extent : SpatialContext
216
+ The spatial extent to be checked, it will check within its bounding box.
217
+ temporal_extent : TemporalContext
218
+ The temporal period to be checked.
219
+
220
+ Returns
221
+ ------
222
+ str
223
+ The orbit state that covers the most area of the given spatio-temporal context
224
+ """
225
+
226
+ # Queries the products in the catalogues
227
+ areas = s1_area_per_orbitstate(backend, spatial_extent, temporal_extent)
228
+
229
+ ascending_overlap = areas["ASCENDING"]["full_overlap"]
230
+ descending_overlap = areas["DESCENDING"]["full_overlap"]
231
+
232
+ if ascending_overlap and not descending_overlap:
233
+ return "ASCENDING"
234
+ elif descending_overlap and not ascending_overlap:
235
+ return "DESCENDING"
236
+ elif ascending_overlap and descending_overlap:
237
+ ascending_cover_area = areas["ASCENDING"]["area"]
238
+ descending_cover_area = areas["DESCENDING"]["area"]
239
+
240
+ # Selects the orbit state that covers the most area
241
+ if ascending_cover_area > descending_cover_area:
242
+ return "ASCENDING"
243
+ else:
244
+ return "DESCENDING"
245
+
246
+ raise UncoveredS1Exception(
247
+ "No product available to fully cover the given spatio-temporal context."
248
+ )
@@ -0,0 +1,64 @@
1
+ """Utilitary function for intervals, useful for temporal aggregation
2
+ methods.
3
+ """
4
+
5
+ from datetime import timedelta
6
+
7
+ from openeo_gfmap import TemporalContext
8
+
9
+
10
+ def quintad_intervals(temporal_extent: TemporalContext) -> list:
11
+ """Returns a list of tuples (start_date, end_date) of quintad intervals
12
+ from the input temporal extent. Quintad intervals are intervals of
13
+ generally 5 days, that never overlap two months.
14
+
15
+ All months are divided in 6 quintads, where the 6th quintad might
16
+ contain 6 days for months of 31 days.
17
+ For the month of February, the 6th quintad is only of three days, or
18
+ four days for the leap year.
19
+ """
20
+ start_date, end_date = temporal_extent.to_datetime()
21
+ quintads = []
22
+
23
+ current_date = start_date
24
+
25
+ # Compute the offset of the first day on the start of the last quintad
26
+ if start_date.day != 1:
27
+ offset = (start_date - timedelta(days=1)).day % 5
28
+ current_date = current_date - timedelta(days=offset)
29
+ else:
30
+ offset = 0
31
+
32
+ while current_date <= end_date:
33
+ # Get the last day of the current month
34
+ last_day = current_date.replace(day=28) + timedelta(days=4)
35
+ last_day = last_day - timedelta(days=last_day.day)
36
+
37
+ # Get the last day of the current quintad
38
+ last_quintad = current_date + timedelta(days=4)
39
+
40
+ # Add a day if the day is the 30th and there is the 31th in the current month
41
+ if last_quintad.day == 30 and last_day.day == 31:
42
+ last_quintad = last_quintad + timedelta(days=1)
43
+
44
+ # If the last quintad is after the last day of the month, then
45
+ # set it to the last day of the month
46
+ if last_quintad > last_day:
47
+ last_quintad = last_day
48
+ # In the case the last quintad is after the end date, then set it to the end date
49
+ elif last_quintad > end_date:
50
+ last_quintad = end_date
51
+
52
+ quintads.append((current_date, last_quintad))
53
+
54
+ # Set the current date to the next quintad
55
+ current_date = last_quintad + timedelta(days=1)
56
+
57
+ # Fixing the offset issue for intervals starting in the middle of a quintad
58
+ quintads[0] = (quintads[0][0] + timedelta(days=offset), quintads[0][1])
59
+
60
+ # Returns to string with the YYYY-mm-dd format
61
+ return [
62
+ (start_date.strftime("%Y-%m-%d"), end_date.strftime("%Y-%m-%d"))
63
+ for start_date, end_date in quintads
64
+ ]
@@ -0,0 +1,25 @@
1
+ """Utilities to edit and update netCDF files.
2
+ """
3
+
4
+ from pathlib import Path
5
+ from typing import Union
6
+
7
+ from netCDF4 import Dataset
8
+
9
+
10
+ def update_nc_attributes(path: Union[str, Path], attributes: dict):
11
+ """
12
+ Update attributes of a NetCDF file.
13
+
14
+ Parameters:
15
+ path (str): Path to the NetCDF file.
16
+ attributes (dict): Dictionary containing attributes to update.
17
+ Keys are attribute names, values are attribute values.
18
+ """
19
+
20
+ with Dataset(path, "r+") as nc:
21
+ for name, value in attributes.items():
22
+ if name in nc.ncattrs():
23
+ setattr(nc, name, value)
24
+ else:
25
+ nc.setncattr(name, value)
@@ -0,0 +1,64 @@
1
+ """Utilitaries to process data tiles."""
2
+
3
+ import numpy as np
4
+ import xarray as xr
5
+
6
+
7
+ def normalize_array(inarr: xr.DataArray, percentile: float = 0.99) -> xr.DataArray:
8
+ """Performs normalization between 0.0 and 1.0 using the given
9
+ percentile.
10
+ """
11
+ quantile_value = inarr.quantile(percentile, dim=["x", "y", "t"])
12
+ minimum = inarr.min(dim=["x", "y", "t"])
13
+
14
+ inarr = (inarr - minimum) / (quantile_value - minimum)
15
+
16
+ # Perform clipping on values that are higher than the computed quantile
17
+ return inarr.where(inarr < 1.0, 1.0)
18
+
19
+
20
+ def select_optical_bands(inarr: xr.DataArray) -> xr.DataArray:
21
+ """Filters and keep only the optical bands for a given array."""
22
+ return inarr.sel(
23
+ bands=[
24
+ band
25
+ for band in inarr.coords["bands"].to_numpy()
26
+ if band.startswith("S2-L2A-B")
27
+ ]
28
+ )
29
+
30
+
31
+ def select_sar_bands(inarr: xr.DataArray) -> xr.DataArray:
32
+ """Filters and keep only the SAR bands for a given array."""
33
+ return inarr.sel(
34
+ bands=[
35
+ band
36
+ for band in inarr.coords["bands"].to_numpy()
37
+ if band in ["S1-SIGMA0-VV", "S1-SIGMA0-VH", "S1-SIGMA0-HH", "S1-SIGMA0-HV"]
38
+ ]
39
+ )
40
+
41
+
42
+ def array_bounds(inarr: xr.DataArray) -> tuple:
43
+ """Returns the 4 bounds values for the x and y coordinates of the tile"""
44
+ return (
45
+ inarr.coords["x"].min().item(),
46
+ inarr.coords["y"].min().item(),
47
+ inarr.coords["x"].max().item(),
48
+ inarr.coords["y"].max().item(),
49
+ )
50
+
51
+
52
+ def arrays_cosine_similarity(
53
+ first_array: xr.DataArray, second_array: xr.DataArray
54
+ ) -> float:
55
+ """Returns a similarity score based on normalized cosine distance. The
56
+ input arrays must have similar ranges to obtain a valid score.
57
+ 1.0 represents the best score (same tiles), while 0.0 is the worst score.
58
+ """
59
+ dot_product = np.sum(first_array * second_array)
60
+ first_norm = np.linalg.norm(first_array)
61
+ second_norm = np.linalg.norm(second_array)
62
+ similarity = (dot_product / (first_norm * second_norm)).item()
63
+
64
+ return similarity
@@ -0,0 +1,57 @@
1
+ Metadata-Version: 2.3
2
+ Name: openeo_gfmap
3
+ Version: 0.1.0
4
+ Summary: OpenEO General Framework for Mapping
5
+ Project-URL: Homepage, https://github.com/Open-EO/openeo-gfmap
6
+ Project-URL: Bug Tracker, https://github.com/Open-EO/openeo-gfmap/issues
7
+ Author: Darius Couchard, Vincent Verelst, Kristof Van Tricht, Stefaan Lippens, Jeroen Degerickx
8
+ License-File: LICENSE
9
+ Classifier: Operating System :: OS Independent
10
+ Classifier: Programming Language :: Python :: 3
11
+ Requires-Python: >=3.8
12
+ Requires-Dist: cftime
13
+ Requires-Dist: fastparquet
14
+ Requires-Dist: geojson>=3.0.0
15
+ Requires-Dist: geopandas
16
+ Requires-Dist: h3
17
+ Requires-Dist: h5netcdf>=1.2.0
18
+ Requires-Dist: netcdf4
19
+ Requires-Dist: numpy<2.0.0
20
+ Requires-Dist: onnxruntime
21
+ Requires-Dist: openeo
22
+ Requires-Dist: pyarrow
23
+ Requires-Dist: rasterio
24
+ Requires-Dist: scipy
25
+ Provides-Extra: dev
26
+ Requires-Dist: matplotlib>=3.3.0; extra == 'dev'
27
+ Requires-Dist: pre-commit; extra == 'dev'
28
+ Requires-Dist: pytest-depends; extra == 'dev'
29
+ Requires-Dist: pytest-timeout>=2.2.0; extra == 'dev'
30
+ Requires-Dist: pytest>=7.4.0; extra == 'dev'
31
+ Requires-Dist: rioxarray>=0.13.0; extra == 'dev'
32
+ Requires-Dist: xarray>=2022.3.0; extra == 'dev'
33
+ Description-Content-Type: text/markdown
34
+
35
+ # OpenEO General Framework for Mapping
36
+
37
+ openEO GFMap aims to simplify for its users the development of mapping applications through Remote Sensing data by leveraging the power of [OpenEO](https://openeo.org/). This framework is developed primarily for Crop Type mapping and Land Cover Classification, but other applications such as regression tasks can be applied.
38
+
39
+ ## How is it used?
40
+
41
+ In order to be used, the user has to specify which kind of input data it expects (satellite, meteo, DEM, ...) and which mode of classification it expects (point based mapping or polygon based). The user specifies then two <i>user defined files (UDF)</i>, one for extracting features from the pre-processed data and the other for performing classification through a model.
42
+
43
+ The Frameworks provides assistance in extraction of training data as well as inference phase, and makes sure that both training data and inference data are processed the same way before passing through the model. The user is responsible for the machine learning related details, and for the training phase itself.
44
+
45
+ <p align="center">
46
+ <img src="./workflow.png">
47
+ </p>
48
+
49
+ ## Framework core principles
50
+
51
+ 1. <b>Backend agnostic</b>: The users shouldn’t have to take care of backend related configurations. The use of OpenEO can vary depending on the backend that is currently in use (for example, the name of data collections). The framework takes care of those differences, while the users only specify the backend they desire to use.
52
+
53
+ 2. <b> Data consistent</b>: providing a common pipeline for training and for inference. The best way of making sure data is processed the same way during the construction of a training dataset than during inference, is to re-use as much as code as possible. The users should be able to extract and preprocess training data with the same configuration. OpenEO leaves the possibility to perform data extraction on sparse points/polygons or directly on dense datacubes. This leaves the possibility to implement a framework that could do both tile-based inference and pixel-based or parcel-based data extraction/preprocessing using the same code.
54
+
55
+ 3. <b>Easy and Collaborative</b>: Pre-implementing common preprocessing/postprocessing routines. Many operations, such a compositing or linear interpolation, are very common within Remote Sensing applications and should be already implemented in the framework. This will avoid code duplication among the personal code of the framework’s users and encourage collaboration for improvement and optimization of existing techniques.
56
+
57
+ 4. <b>Performant</b>: Leverage OpenEO processes as much as possible for preprocessing. In the cropclass and worldcereal projects, preprocessing is performed with a combination of OpenEO processes (masking, compositing, linear interpolation) and the implementation of a Feature Extractor within an UDF (computing indices, percentiles). Ideally, OpenEO features should be used as much as possible, while the features extractor should be as simple as possible, only implementing what is currently not possible within OpenEO.