mapchete-eo 2026.2.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- mapchete_eo/__init__.py +1 -0
- mapchete_eo/array/__init__.py +0 -0
- mapchete_eo/array/buffer.py +16 -0
- mapchete_eo/array/color.py +29 -0
- mapchete_eo/array/convert.py +163 -0
- mapchete_eo/base.py +653 -0
- mapchete_eo/blacklist.txt +175 -0
- mapchete_eo/cli/__init__.py +30 -0
- mapchete_eo/cli/bounds.py +22 -0
- mapchete_eo/cli/options_arguments.py +227 -0
- mapchete_eo/cli/s2_brdf.py +77 -0
- mapchete_eo/cli/s2_cat_results.py +130 -0
- mapchete_eo/cli/s2_find_broken_products.py +77 -0
- mapchete_eo/cli/s2_jp2_static_catalog.py +166 -0
- mapchete_eo/cli/s2_mask.py +71 -0
- mapchete_eo/cli/s2_mgrs.py +45 -0
- mapchete_eo/cli/s2_rgb.py +114 -0
- mapchete_eo/cli/s2_verify.py +129 -0
- mapchete_eo/cli/static_catalog.py +82 -0
- mapchete_eo/eostac.py +30 -0
- mapchete_eo/exceptions.py +87 -0
- mapchete_eo/image_operations/__init__.py +12 -0
- mapchete_eo/image_operations/blend_functions.py +579 -0
- mapchete_eo/image_operations/color_correction.py +136 -0
- mapchete_eo/image_operations/compositing.py +266 -0
- mapchete_eo/image_operations/dtype_scale.py +43 -0
- mapchete_eo/image_operations/fillnodata.py +130 -0
- mapchete_eo/image_operations/filters.py +319 -0
- mapchete_eo/image_operations/linear_normalization.py +81 -0
- mapchete_eo/image_operations/sigmoidal.py +114 -0
- mapchete_eo/io/__init__.py +37 -0
- mapchete_eo/io/assets.py +496 -0
- mapchete_eo/io/items.py +162 -0
- mapchete_eo/io/levelled_cubes.py +259 -0
- mapchete_eo/io/path.py +155 -0
- mapchete_eo/io/products.py +423 -0
- mapchete_eo/io/profiles.py +45 -0
- mapchete_eo/platforms/sentinel2/__init__.py +17 -0
- mapchete_eo/platforms/sentinel2/_mapper_registry.py +89 -0
- mapchete_eo/platforms/sentinel2/bandpass_adjustment.py +104 -0
- mapchete_eo/platforms/sentinel2/brdf/__init__.py +8 -0
- mapchete_eo/platforms/sentinel2/brdf/config.py +32 -0
- mapchete_eo/platforms/sentinel2/brdf/correction.py +260 -0
- mapchete_eo/platforms/sentinel2/brdf/hls.py +251 -0
- mapchete_eo/platforms/sentinel2/brdf/models.py +44 -0
- mapchete_eo/platforms/sentinel2/brdf/protocols.py +27 -0
- mapchete_eo/platforms/sentinel2/brdf/ross_thick.py +136 -0
- mapchete_eo/platforms/sentinel2/brdf/sun_angle_arrays.py +76 -0
- mapchete_eo/platforms/sentinel2/config.py +241 -0
- mapchete_eo/platforms/sentinel2/driver.py +43 -0
- mapchete_eo/platforms/sentinel2/masks.py +329 -0
- mapchete_eo/platforms/sentinel2/metadata_parser/__init__.py +6 -0
- mapchete_eo/platforms/sentinel2/metadata_parser/base.py +56 -0
- mapchete_eo/platforms/sentinel2/metadata_parser/default_path_mapper.py +135 -0
- mapchete_eo/platforms/sentinel2/metadata_parser/models.py +78 -0
- mapchete_eo/platforms/sentinel2/metadata_parser/s2metadata.py +639 -0
- mapchete_eo/platforms/sentinel2/preconfigured_sources/__init__.py +57 -0
- mapchete_eo/platforms/sentinel2/preconfigured_sources/guessers.py +108 -0
- mapchete_eo/platforms/sentinel2/preconfigured_sources/item_mappers.py +171 -0
- mapchete_eo/platforms/sentinel2/preconfigured_sources/metadata_xml_mappers.py +217 -0
- mapchete_eo/platforms/sentinel2/preprocessing_tasks.py +50 -0
- mapchete_eo/platforms/sentinel2/processing_baseline.py +163 -0
- mapchete_eo/platforms/sentinel2/product.py +747 -0
- mapchete_eo/platforms/sentinel2/source.py +114 -0
- mapchete_eo/platforms/sentinel2/types.py +114 -0
- mapchete_eo/processes/__init__.py +0 -0
- mapchete_eo/processes/config.py +51 -0
- mapchete_eo/processes/dtype_scale.py +112 -0
- mapchete_eo/processes/eo_to_xarray.py +19 -0
- mapchete_eo/processes/merge_rasters.py +239 -0
- mapchete_eo/product.py +323 -0
- mapchete_eo/protocols.py +61 -0
- mapchete_eo/search/__init__.py +14 -0
- mapchete_eo/search/base.py +285 -0
- mapchete_eo/search/config.py +113 -0
- mapchete_eo/search/s2_mgrs.py +313 -0
- mapchete_eo/search/stac_search.py +278 -0
- mapchete_eo/search/stac_static.py +197 -0
- mapchete_eo/search/utm_search.py +251 -0
- mapchete_eo/settings.py +25 -0
- mapchete_eo/sort.py +60 -0
- mapchete_eo/source.py +109 -0
- mapchete_eo/time.py +62 -0
- mapchete_eo/types.py +76 -0
- mapchete_eo-2026.2.0.dist-info/METADATA +91 -0
- mapchete_eo-2026.2.0.dist-info/RECORD +89 -0
- mapchete_eo-2026.2.0.dist-info/WHEEL +4 -0
- mapchete_eo-2026.2.0.dist-info/entry_points.txt +11 -0
- mapchete_eo-2026.2.0.dist-info/licenses/LICENSE +21 -0
|
@@ -0,0 +1,197 @@
|
|
|
1
|
+
from functools import cached_property
|
|
2
|
+
import logging
|
|
3
|
+
import warnings
|
|
4
|
+
from typing import Any, Dict, Generator, List, Optional, Union
|
|
5
|
+
|
|
6
|
+
from mapchete import Bounds
|
|
7
|
+
from mapchete.types import BoundsLike
|
|
8
|
+
from pystac import Item, Catalog, Collection
|
|
9
|
+
from mapchete.io.vector import bounds_intersect
|
|
10
|
+
from pystac.stac_io import StacIO
|
|
11
|
+
from pystac_client import CollectionClient
|
|
12
|
+
from shapely.geometry import shape
|
|
13
|
+
from shapely.geometry.base import BaseGeometry
|
|
14
|
+
|
|
15
|
+
from mapchete_eo.search.base import (
|
|
16
|
+
CollectionSearcher,
|
|
17
|
+
FSSpecStacIO,
|
|
18
|
+
StaticCollectionWriterMixin,
|
|
19
|
+
filter_items,
|
|
20
|
+
)
|
|
21
|
+
from mapchete_eo.search.config import StacStaticConfig
|
|
22
|
+
from mapchete_eo.time import time_ranges_intersect
|
|
23
|
+
from mapchete_eo.types import TimeRange
|
|
24
|
+
|
|
25
|
+
logger = logging.getLogger(__name__)
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
StacIO.set_default(FSSpecStacIO)
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
class STACStaticCollection(StaticCollectionWriterMixin, CollectionSearcher):
|
|
32
|
+
"""
|
|
33
|
+
Search implementation for static STAC collections.
|
|
34
|
+
"""
|
|
35
|
+
|
|
36
|
+
config_cls = StacStaticConfig
|
|
37
|
+
|
|
38
|
+
@cached_property
|
|
39
|
+
def client(self) -> CollectionClient:
|
|
40
|
+
return CollectionClient.from_file(str(self.collection), stac_io=FSSpecStacIO())
|
|
41
|
+
|
|
42
|
+
@cached_property
|
|
43
|
+
def eo_bands(self) -> List[str]:
|
|
44
|
+
eo_bands = self.client.extra_fields.get("properties", {}).get("eo:bands")
|
|
45
|
+
if eo_bands:
|
|
46
|
+
return eo_bands
|
|
47
|
+
else:
|
|
48
|
+
warnings.warn(
|
|
49
|
+
"Unable to read eo:bands definition from collection. "
|
|
50
|
+
"Trying now to get information from assets ..."
|
|
51
|
+
)
|
|
52
|
+
# see if eo:bands can be found in properties
|
|
53
|
+
try:
|
|
54
|
+
item = next(self.client.get_items(recursive=True))
|
|
55
|
+
eo_bands = item.properties.get("eo:bands")
|
|
56
|
+
if eo_bands:
|
|
57
|
+
return eo_bands
|
|
58
|
+
|
|
59
|
+
# look through the assets and collect eo:bands
|
|
60
|
+
out = {}
|
|
61
|
+
for asset in item.assets.values():
|
|
62
|
+
for eo_band in asset.extra_fields.get("eo:bands", []):
|
|
63
|
+
out[eo_band["name"]] = eo_band
|
|
64
|
+
if out:
|
|
65
|
+
return [v for v in out.values()]
|
|
66
|
+
except StopIteration:
|
|
67
|
+
pass
|
|
68
|
+
|
|
69
|
+
logger.debug("cannot find eo:bands definition")
|
|
70
|
+
return []
|
|
71
|
+
|
|
72
|
+
def search(
|
|
73
|
+
self,
|
|
74
|
+
time: Optional[Union[TimeRange, List[TimeRange]]] = None,
|
|
75
|
+
bounds: Optional[BoundsLike] = None,
|
|
76
|
+
area: Optional[BaseGeometry] = None,
|
|
77
|
+
query: Optional[str] = None,
|
|
78
|
+
search_kwargs: Optional[Dict[str, Any]] = None,
|
|
79
|
+
) -> Generator[Item, None, None]:
|
|
80
|
+
if area is None and bounds:
|
|
81
|
+
bounds = Bounds.from_inp(bounds)
|
|
82
|
+
area = shape(bounds)
|
|
83
|
+
for item in filter_items(self._raw_search(time=time, area=area), query=query):
|
|
84
|
+
yield item
|
|
85
|
+
|
|
86
|
+
def _raw_search(
|
|
87
|
+
self,
|
|
88
|
+
time: Optional[Union[TimeRange, List[TimeRange]]] = None,
|
|
89
|
+
area: Optional[BaseGeometry] = None,
|
|
90
|
+
) -> Generator[Item, None, None]:
|
|
91
|
+
if area is not None and area.is_empty:
|
|
92
|
+
return
|
|
93
|
+
logger.debug("iterate through children")
|
|
94
|
+
if time:
|
|
95
|
+
for time_range in time if isinstance(time, list) else [time]:
|
|
96
|
+
for item in _all_intersecting_items(
|
|
97
|
+
self.client,
|
|
98
|
+
area=area,
|
|
99
|
+
time_range=time_range,
|
|
100
|
+
):
|
|
101
|
+
item.make_asset_hrefs_absolute()
|
|
102
|
+
yield item
|
|
103
|
+
else:
|
|
104
|
+
for item in _all_intersecting_items(
|
|
105
|
+
self.client,
|
|
106
|
+
area=area,
|
|
107
|
+
):
|
|
108
|
+
item.make_asset_hrefs_absolute()
|
|
109
|
+
yield item
|
|
110
|
+
|
|
111
|
+
|
|
112
|
+
def _all_intersecting_items(
|
|
113
|
+
collection: Union[Catalog, Collection],
|
|
114
|
+
area: BaseGeometry,
|
|
115
|
+
time_range: Optional[TimeRange] = None,
|
|
116
|
+
):
|
|
117
|
+
# collection items
|
|
118
|
+
logger.debug("checking items...")
|
|
119
|
+
for item in collection.get_items():
|
|
120
|
+
# yield item if it intersects with extent
|
|
121
|
+
logger.debug("item %s", item.id)
|
|
122
|
+
if _item_extent_intersects(item, area=area, time_range=time_range):
|
|
123
|
+
logger.debug("item %s within search parameters", item.id)
|
|
124
|
+
yield item
|
|
125
|
+
|
|
126
|
+
# collection children
|
|
127
|
+
logger.debug("checking collections...")
|
|
128
|
+
for child in collection.get_children():
|
|
129
|
+
# yield collection if it intersects with extent
|
|
130
|
+
logger.debug("collection %s", collection.id)
|
|
131
|
+
if _collection_extent_intersects(child, area=area, time_range=time_range):
|
|
132
|
+
logger.debug("found catalog %s with intersecting items", child.id)
|
|
133
|
+
yield from _all_intersecting_items(child, area=area, time_range=time_range)
|
|
134
|
+
|
|
135
|
+
|
|
136
|
+
def _item_extent_intersects(
|
|
137
|
+
item: Item,
|
|
138
|
+
area: Optional[BaseGeometry] = None,
|
|
139
|
+
time_range: Optional[TimeRange] = None,
|
|
140
|
+
) -> bool:
|
|
141
|
+
# NOTE: bounds intersect is faster but in the current implementation cannot
|
|
142
|
+
# handle item footprints going over the Antimeridian (and have been split up into
|
|
143
|
+
# MultiPolygon geometries)
|
|
144
|
+
# spatial_intersect = bounds_intersect(item.bbox, bounds) if bounds else True
|
|
145
|
+
spatial_intersect = shape(item.geometry).intersects(area) if area else True
|
|
146
|
+
if time_range and item.datetime:
|
|
147
|
+
temporal_intersect = time_ranges_intersect(
|
|
148
|
+
(item.datetime, item.datetime), (time_range.start, time_range.end)
|
|
149
|
+
)
|
|
150
|
+
logger.debug(
|
|
151
|
+
"spatial intersect: %s, temporal intersect: %s",
|
|
152
|
+
spatial_intersect,
|
|
153
|
+
temporal_intersect,
|
|
154
|
+
)
|
|
155
|
+
return spatial_intersect and temporal_intersect
|
|
156
|
+
else:
|
|
157
|
+
logger.debug("spatial intersect: %s", spatial_intersect)
|
|
158
|
+
return spatial_intersect
|
|
159
|
+
|
|
160
|
+
|
|
161
|
+
def _collection_extent_intersects(
|
|
162
|
+
catalog, area: Optional[BaseGeometry] = None, time_range: Optional[TimeRange] = None
|
|
163
|
+
):
|
|
164
|
+
"""
|
|
165
|
+
Collection extent items (spatial, temporal) is a list of items, e.g. list of bounds values.
|
|
166
|
+
"""
|
|
167
|
+
|
|
168
|
+
def _intersects_spatially():
|
|
169
|
+
for b in catalog.extent.spatial.to_dict().get("bbox", [[]]):
|
|
170
|
+
if bounds_intersect(area.bounds, b):
|
|
171
|
+
logger.debug("spatial intersect: True")
|
|
172
|
+
return True
|
|
173
|
+
else:
|
|
174
|
+
logger.debug("spatial intersect: False")
|
|
175
|
+
return False
|
|
176
|
+
|
|
177
|
+
def _intersects_temporally():
|
|
178
|
+
for t in catalog.extent.temporal.to_dict().get("interval", [[]]):
|
|
179
|
+
if time_ranges_intersect((time_range.start, time_range.end), t):
|
|
180
|
+
logger.debug("temporal intersect: True")
|
|
181
|
+
return True
|
|
182
|
+
else:
|
|
183
|
+
logger.debug("temporal intersect: False")
|
|
184
|
+
return False
|
|
185
|
+
|
|
186
|
+
spatial_intersect = _intersects_spatially() if area else True
|
|
187
|
+
if time_range:
|
|
188
|
+
temporal_intersect = _intersects_temporally()
|
|
189
|
+
logger.debug(
|
|
190
|
+
"spatial intersect: %s, temporal intersect: %s",
|
|
191
|
+
spatial_intersect,
|
|
192
|
+
temporal_intersect,
|
|
193
|
+
)
|
|
194
|
+
return spatial_intersect and temporal_intersect
|
|
195
|
+
else:
|
|
196
|
+
logger.debug("spatial intersect: %s", spatial_intersect)
|
|
197
|
+
return spatial_intersect
|
|
@@ -0,0 +1,251 @@
|
|
|
1
|
+
import datetime
|
|
2
|
+
from functools import cached_property
|
|
3
|
+
import logging
|
|
4
|
+
from typing import Any, Dict, Generator, List, Optional, Union
|
|
5
|
+
|
|
6
|
+
from mapchete.io.vector import fiona_open
|
|
7
|
+
from mapchete.path import MPath, MPathLike
|
|
8
|
+
from mapchete.types import Bounds, BoundsLike
|
|
9
|
+
from pystac.collection import Collection
|
|
10
|
+
from pystac.item import Item
|
|
11
|
+
from pystac_client import CollectionClient
|
|
12
|
+
from shapely.errors import GEOSException
|
|
13
|
+
from shapely.geometry import shape
|
|
14
|
+
from shapely.geometry.base import BaseGeometry
|
|
15
|
+
|
|
16
|
+
from mapchete_eo.exceptions import ItemGeometryError
|
|
17
|
+
from mapchete_eo.search.base import (
|
|
18
|
+
CollectionSearcher,
|
|
19
|
+
StaticCollectionWriterMixin,
|
|
20
|
+
filter_items,
|
|
21
|
+
)
|
|
22
|
+
from mapchete_eo.search.config import UTMSearchConfig
|
|
23
|
+
from mapchete_eo.search.s2_mgrs import S2Tile, s2_tiles_from_bounds
|
|
24
|
+
from mapchete_eo.time import day_range, to_datetime
|
|
25
|
+
from mapchete_eo.types import TimeRange
|
|
26
|
+
|
|
27
|
+
logger = logging.getLogger(__name__)
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
class UTMSearchCatalog(StaticCollectionWriterMixin, CollectionSearcher):
|
|
31
|
+
"""
|
|
32
|
+
Search implementation for UTM-grid based catalogs (e.g. Sentinel-2 on AWS).
|
|
33
|
+
"""
|
|
34
|
+
|
|
35
|
+
config_cls = UTMSearchConfig
|
|
36
|
+
|
|
37
|
+
@cached_property
|
|
38
|
+
def endpoint(self) -> Optional[str]:
|
|
39
|
+
for collection_properties in self.config.sinergise_aws_collections.values():
|
|
40
|
+
if collection_properties["id"] == self.collection.split("/")[-1].replace(
|
|
41
|
+
".json", ""
|
|
42
|
+
):
|
|
43
|
+
return collection_properties.get("endpoint")
|
|
44
|
+
return None
|
|
45
|
+
|
|
46
|
+
day_subdir_schema: str = "{year}/{month:02d}/{day:02d}"
|
|
47
|
+
stac_json_endswith: str = "T{tile_id}.json"
|
|
48
|
+
|
|
49
|
+
@cached_property
|
|
50
|
+
def client(self) -> CollectionClient:
|
|
51
|
+
return next(self.get_collections())
|
|
52
|
+
|
|
53
|
+
@cached_property
|
|
54
|
+
def eo_bands(self) -> List[str]: # pragma: no cover
|
|
55
|
+
for (
|
|
56
|
+
collection_properties
|
|
57
|
+
) in UTMSearchConfig().sinergise_aws_collections.values():
|
|
58
|
+
if collection_properties["id"] == self.collection.split("/")[-1]:
|
|
59
|
+
collection = Collection.from_dict(
|
|
60
|
+
collection_properties["path"].read_json()
|
|
61
|
+
)
|
|
62
|
+
if collection:
|
|
63
|
+
summary = collection.summaries.to_dict()
|
|
64
|
+
if "eo:bands" in summary:
|
|
65
|
+
return summary["eo:bands"]
|
|
66
|
+
else:
|
|
67
|
+
raise ValueError(f"cannot find collection {collection}")
|
|
68
|
+
else:
|
|
69
|
+
logger.debug(
|
|
70
|
+
"cannot find eo:bands definition from collection %s",
|
|
71
|
+
self.collection,
|
|
72
|
+
)
|
|
73
|
+
return []
|
|
74
|
+
|
|
75
|
+
def search(
|
|
76
|
+
self,
|
|
77
|
+
time: Optional[Union[TimeRange, List[TimeRange]]] = None,
|
|
78
|
+
bounds: Optional[BoundsLike] = None,
|
|
79
|
+
area: Optional[BaseGeometry] = None,
|
|
80
|
+
query: Optional[str] = None,
|
|
81
|
+
search_kwargs: Optional[Dict[str, Any]] = None,
|
|
82
|
+
) -> Generator[Item, None, None]:
|
|
83
|
+
for item in filter_items(
|
|
84
|
+
self._raw_search(
|
|
85
|
+
time=time, bounds=Bounds.from_inp(bounds) if bounds else None, area=area
|
|
86
|
+
),
|
|
87
|
+
query=query,
|
|
88
|
+
):
|
|
89
|
+
yield item
|
|
90
|
+
|
|
91
|
+
def _raw_search(
|
|
92
|
+
self,
|
|
93
|
+
time: Optional[Union[TimeRange, List[TimeRange]]] = None,
|
|
94
|
+
bounds: Optional[Bounds] = None,
|
|
95
|
+
area: Optional[BaseGeometry] = None,
|
|
96
|
+
config: Optional[UTMSearchConfig] = None,
|
|
97
|
+
) -> Generator[Item, None, None]:
|
|
98
|
+
config = config or UTMSearchConfig()
|
|
99
|
+
if time is None:
|
|
100
|
+
raise ValueError("time must be given")
|
|
101
|
+
if area is not None and area.is_empty:
|
|
102
|
+
return
|
|
103
|
+
if area is not None:
|
|
104
|
+
area = area
|
|
105
|
+
bounds = Bounds.from_inp(area)
|
|
106
|
+
elif bounds is not None:
|
|
107
|
+
bounds = Bounds.from_inp(bounds)
|
|
108
|
+
area = shape(bounds)
|
|
109
|
+
|
|
110
|
+
# Cleaner time list in case None present as time (undefined)
|
|
111
|
+
time_list: list[TimeRange] = (
|
|
112
|
+
[t for t in time if t is not None] if isinstance(time, list) else [time]
|
|
113
|
+
)
|
|
114
|
+
for time_range in time_list:
|
|
115
|
+
start_time = (
|
|
116
|
+
time_range.start
|
|
117
|
+
if isinstance(time_range.start, datetime.date)
|
|
118
|
+
else datetime.datetime.strptime(time_range.start, "%Y-%m-%d")
|
|
119
|
+
)
|
|
120
|
+
end_time = (
|
|
121
|
+
time_range.end
|
|
122
|
+
if isinstance(time_range.end, datetime.date)
|
|
123
|
+
else datetime.datetime.strptime(time_range.end, "%Y-%m-%d")
|
|
124
|
+
)
|
|
125
|
+
|
|
126
|
+
logger.debug(
|
|
127
|
+
"determine items from %s to %s over %s...",
|
|
128
|
+
start_time,
|
|
129
|
+
end_time,
|
|
130
|
+
bounds,
|
|
131
|
+
)
|
|
132
|
+
if config.search_index:
|
|
133
|
+
logger.debug(
|
|
134
|
+
"use existing search index at %s", str(config.search_index)
|
|
135
|
+
)
|
|
136
|
+
for item in items_from_static_index(
|
|
137
|
+
bounds=bounds,
|
|
138
|
+
start_time=start_time,
|
|
139
|
+
end_time=end_time,
|
|
140
|
+
index_path=config.search_index,
|
|
141
|
+
):
|
|
142
|
+
try:
|
|
143
|
+
item_path = item.get_self_href()
|
|
144
|
+
if item_path in self.blacklist: # pragma: no cover
|
|
145
|
+
logger.debug(
|
|
146
|
+
"item %s found in blacklist and skipping", item_path
|
|
147
|
+
)
|
|
148
|
+
elif area.intersects(shape(item.geometry)):
|
|
149
|
+
yield item
|
|
150
|
+
except GEOSException as exc:
|
|
151
|
+
raise ItemGeometryError(
|
|
152
|
+
f"item {item.get_self_href()} geometry could not be resolved: {str(exc)}"
|
|
153
|
+
)
|
|
154
|
+
|
|
155
|
+
else:
|
|
156
|
+
logger.debug("using dumb ls directory search at %s", str(self.endpoint))
|
|
157
|
+
for item in items_from_directories(
|
|
158
|
+
bounds=bounds,
|
|
159
|
+
start_time=start_time,
|
|
160
|
+
end_time=end_time,
|
|
161
|
+
endpoint=self.endpoint,
|
|
162
|
+
day_subdir_schema=self.day_subdir_schema,
|
|
163
|
+
stac_json_endswith=self.stac_json_endswith,
|
|
164
|
+
):
|
|
165
|
+
item_path = item.get_self_href()
|
|
166
|
+
if item_path in self.blacklist: # pragma: no cover
|
|
167
|
+
logger.debug(
|
|
168
|
+
"item %s found in blacklist and skipping", item_path
|
|
169
|
+
)
|
|
170
|
+
elif area.intersects(shape(item.geometry)):
|
|
171
|
+
yield item
|
|
172
|
+
|
|
173
|
+
def get_collections(self):
|
|
174
|
+
"""
|
|
175
|
+
yeild transformed collection from:
|
|
176
|
+
https://sentinel-s2-l2a-stac.s3.amazonaws.com/sentinel-s2-l2a.json,
|
|
177
|
+
or https://sentinel-s2-l1c-stac.s3.amazonaws.com/sentinel-s2-l1c.json,
|
|
178
|
+
etc.
|
|
179
|
+
"""
|
|
180
|
+
for collection_properties in self.config.sinergise_aws_collections.values():
|
|
181
|
+
collection = Collection.from_dict(collection_properties["path"].read_json())
|
|
182
|
+
if self.collection.split("/")[-1] == collection.id:
|
|
183
|
+
yield collection
|
|
184
|
+
|
|
185
|
+
|
|
186
|
+
def items_from_static_index(
|
|
187
|
+
bounds: Bounds,
|
|
188
|
+
start_time: Union[datetime.datetime, datetime.date],
|
|
189
|
+
end_time: Union[datetime.datetime, datetime.date],
|
|
190
|
+
index_path: MPathLike,
|
|
191
|
+
) -> Generator[Item, None, None]:
|
|
192
|
+
index_path = MPath.from_inp(index_path)
|
|
193
|
+
|
|
194
|
+
start_time = to_datetime(start_time)
|
|
195
|
+
# add day at end_time to include last day
|
|
196
|
+
end_time = to_datetime(end_time + datetime.timedelta(days=1))
|
|
197
|
+
|
|
198
|
+
# open index and determine which S2Tiles are covered
|
|
199
|
+
with fiona_open(index_path) as index:
|
|
200
|
+
# look at entries in every S2Tile and match with timestamp
|
|
201
|
+
for s2tile_feature in index.filter(bbox=bounds):
|
|
202
|
+
with fiona_open(
|
|
203
|
+
index_path.parent / s2tile_feature.properties["path"]
|
|
204
|
+
) as s2tile:
|
|
205
|
+
for item_feature in s2tile.filter(bbox=bounds):
|
|
206
|
+
# remove timezone info in order to compare with start_time and end_time
|
|
207
|
+
timestamp = to_datetime(
|
|
208
|
+
item_feature.properties["datetime"]
|
|
209
|
+
).replace(tzinfo=None)
|
|
210
|
+
|
|
211
|
+
if start_time <= timestamp <= end_time:
|
|
212
|
+
yield Item.from_dict(
|
|
213
|
+
MPath.from_inp(item_feature.properties["path"]).read_json()
|
|
214
|
+
)
|
|
215
|
+
|
|
216
|
+
|
|
217
|
+
def items_from_directories(
|
|
218
|
+
bounds: Bounds,
|
|
219
|
+
start_time: Union[datetime.datetime, datetime.date],
|
|
220
|
+
end_time: Union[datetime.datetime, datetime.date],
|
|
221
|
+
endpoint: MPathLike,
|
|
222
|
+
day_subdir_schema: str = "{year}/{month:02d}/{day:02d}",
|
|
223
|
+
stac_json_endswith: str = "T{tile_id}.json",
|
|
224
|
+
) -> Generator[Item, None, None]:
|
|
225
|
+
# get Sentinel-2 tiles over given bounds
|
|
226
|
+
s2_tiles = s2_tiles_from_bounds(*bounds)
|
|
227
|
+
|
|
228
|
+
# for each day within time range, look for tiles
|
|
229
|
+
for day in day_range(start_date=start_time, end_date=end_time):
|
|
230
|
+
day_path = MPath.from_inp(endpoint) / day_subdir_schema.format(
|
|
231
|
+
year=day.year, month=day.month, day=day.day
|
|
232
|
+
)
|
|
233
|
+
for item in find_items(
|
|
234
|
+
day_path,
|
|
235
|
+
s2_tiles,
|
|
236
|
+
product_endswith=stac_json_endswith,
|
|
237
|
+
):
|
|
238
|
+
yield item
|
|
239
|
+
|
|
240
|
+
|
|
241
|
+
def find_items(
|
|
242
|
+
path: MPath,
|
|
243
|
+
s2_tiles: List[S2Tile],
|
|
244
|
+
product_endswith: str = "T{tile_id}.json",
|
|
245
|
+
) -> Generator[Item, None, None]:
|
|
246
|
+
match_parts = tuple(
|
|
247
|
+
product_endswith.format(tile_id=s2_tile.tile_id) for s2_tile in s2_tiles
|
|
248
|
+
)
|
|
249
|
+
for product_path in path.ls():
|
|
250
|
+
if product_path.endswith(match_parts):
|
|
251
|
+
yield Item.from_file(product_path)
|
mapchete_eo/settings.py
ADDED
|
@@ -0,0 +1,25 @@
|
|
|
1
|
+
from typing import Optional
|
|
2
|
+
|
|
3
|
+
from mapchete.path import MPath, MPathLike
|
|
4
|
+
from pydantic_settings import BaseSettings, SettingsConfigDict
|
|
5
|
+
from rasterio.crs import CRS
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
class Settings(BaseSettings):
|
|
9
|
+
"""
|
|
10
|
+
Combine default settings with env variables.
|
|
11
|
+
|
|
12
|
+
All settings can be set in the environment by adding the 'MHUB_' prefix
|
|
13
|
+
and the settings in uppercase, e.g. MAPCHETE_EO_.
|
|
14
|
+
"""
|
|
15
|
+
|
|
16
|
+
default_cache_location: MPathLike = MPath("s3://eox-mhub-cache/")
|
|
17
|
+
default_catalog_crs: CRS = CRS.from_epsg(4326)
|
|
18
|
+
blacklist: Optional[MPathLike] = None
|
|
19
|
+
lazy_load_stac_items: bool = True
|
|
20
|
+
|
|
21
|
+
# read from environment
|
|
22
|
+
model_config = SettingsConfigDict(env_prefix="MAPCHETE_EO_")
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
mapchete_eo_settings: Settings = Settings()
|
mapchete_eo/sort.py
ADDED
|
@@ -0,0 +1,60 @@
|
|
|
1
|
+
"""
|
|
2
|
+
This module holds all code required to sort products or slices.
|
|
3
|
+
"""
|
|
4
|
+
|
|
5
|
+
from typing import Callable, List, Optional
|
|
6
|
+
|
|
7
|
+
from pydantic import BaseModel
|
|
8
|
+
|
|
9
|
+
from mapchete_eo.protocols import DateTimeProtocol, GetPropertyProtocol
|
|
10
|
+
from mapchete_eo.time import timedelta, to_datetime
|
|
11
|
+
from mapchete_eo.types import DateTimeLike
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
class SortMethodConfig(BaseModel):
|
|
15
|
+
func: Callable
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
def sort_objects_by_target_date(
|
|
19
|
+
objects: List[DateTimeProtocol],
|
|
20
|
+
target_date: Optional[DateTimeLike] = None,
|
|
21
|
+
reverse: bool = False,
|
|
22
|
+
**kwargs,
|
|
23
|
+
) -> List[DateTimeProtocol]:
|
|
24
|
+
"""
|
|
25
|
+
Sort objects by their distance to a target date.
|
|
26
|
+
"""
|
|
27
|
+
if len(objects) == 0:
|
|
28
|
+
return objects
|
|
29
|
+
|
|
30
|
+
if target_date is None:
|
|
31
|
+
time_list = [to_datetime(object.datetime) for object in objects]
|
|
32
|
+
start_time = min(time_list)
|
|
33
|
+
end_time = max(time_list)
|
|
34
|
+
target_datetime = start_time + (end_time - start_time) / 2
|
|
35
|
+
else:
|
|
36
|
+
target_datetime = to_datetime(target_date)
|
|
37
|
+
|
|
38
|
+
objects.sort(key=lambda x: timedelta(x.datetime, target_datetime), reverse=reverse)
|
|
39
|
+
|
|
40
|
+
return objects
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
class TargetDateSort(SortMethodConfig):
|
|
44
|
+
func: Callable = sort_objects_by_target_date
|
|
45
|
+
target_date: Optional[DateTimeLike] = None
|
|
46
|
+
reverse: bool = False
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
def sort_objects_by_cloud_cover(
|
|
50
|
+
objects: List[GetPropertyProtocol], reverse: bool = False
|
|
51
|
+
) -> List[GetPropertyProtocol]:
|
|
52
|
+
if len(objects) == 0: # pragma: no cover
|
|
53
|
+
return objects
|
|
54
|
+
objects.sort(key=lambda x: x.get_property("eo:cloud_cover"), reverse=reverse)
|
|
55
|
+
return objects
|
|
56
|
+
|
|
57
|
+
|
|
58
|
+
class CloudCoverSort(SortMethodConfig):
|
|
59
|
+
func: Callable = sort_objects_by_cloud_cover
|
|
60
|
+
reverse: bool = False
|
mapchete_eo/source.py
ADDED
|
@@ -0,0 +1,109 @@
|
|
|
1
|
+
from functools import cached_property
|
|
2
|
+
from typing import Any, Dict, List, Literal, Optional, Generator, Union, Callable
|
|
3
|
+
|
|
4
|
+
from mapchete.bounds import Bounds
|
|
5
|
+
from mapchete.path import MPath
|
|
6
|
+
from mapchete.types import BoundsLike, CRSLike, MPathLike
|
|
7
|
+
from pydantic import BaseModel, ConfigDict, model_validator
|
|
8
|
+
from pystac import Item
|
|
9
|
+
from shapely.geometry.base import BaseGeometry
|
|
10
|
+
from shapely.errors import GEOSException
|
|
11
|
+
|
|
12
|
+
from mapchete_eo.exceptions import ItemGeometryError
|
|
13
|
+
from mapchete_eo.search.base import CollectionSearcher
|
|
14
|
+
from mapchete_eo.search import STACSearchCollection, STACStaticCollection
|
|
15
|
+
from mapchete_eo.settings import mapchete_eo_settings
|
|
16
|
+
from mapchete_eo.types import TimeRange
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
class Source(BaseModel):
|
|
20
|
+
"""
|
|
21
|
+
All information required to consume EO products.
|
|
22
|
+
"""
|
|
23
|
+
|
|
24
|
+
collection: str
|
|
25
|
+
catalog_crs: Optional[CRSLike] = mapchete_eo_settings.default_catalog_crs
|
|
26
|
+
query: Optional[str] = None
|
|
27
|
+
area: Optional[Union[MPathLike, dict, type[BaseGeometry]]] = None
|
|
28
|
+
bounds: Optional[BoundsLike] = None
|
|
29
|
+
|
|
30
|
+
model_config = ConfigDict(arbitrary_types_allowed=True)
|
|
31
|
+
|
|
32
|
+
@property
|
|
33
|
+
def item_modifier_funcs(self) -> List[Callable]:
|
|
34
|
+
return []
|
|
35
|
+
|
|
36
|
+
@cached_property
|
|
37
|
+
def catalog_type(self) -> Literal["search", "static"]:
|
|
38
|
+
# TODO: stupid test but probably sufficient
|
|
39
|
+
return "static" if self.collection.endswith(".json") else "search"
|
|
40
|
+
|
|
41
|
+
def _spatial_subset(
|
|
42
|
+
self,
|
|
43
|
+
bounds: Optional[BoundsLike] = None,
|
|
44
|
+
area: Optional[BaseGeometry] = None,
|
|
45
|
+
) -> Dict[str, Any]:
|
|
46
|
+
"""Combine bounds and area with bounds defined in Source if any."""
|
|
47
|
+
if self.bounds is None:
|
|
48
|
+
return {"bounds": bounds, "area": area}
|
|
49
|
+
self_bounds = Bounds.from_inp(self.bounds)
|
|
50
|
+
out = dict()
|
|
51
|
+
if bounds is not None:
|
|
52
|
+
bounds = Bounds.from_inp(bounds)
|
|
53
|
+
if bounds.intersects(self_bounds):
|
|
54
|
+
out["bounds"] = Bounds.from_inp(
|
|
55
|
+
bounds.geometry.intersection(self_bounds.geometry)
|
|
56
|
+
)
|
|
57
|
+
if area is not None:
|
|
58
|
+
out["area"] = area.intersection(self_bounds.geometry)
|
|
59
|
+
return out
|
|
60
|
+
|
|
61
|
+
def search(
|
|
62
|
+
self,
|
|
63
|
+
time: Optional[Union[TimeRange, List[TimeRange]]] = None,
|
|
64
|
+
bounds: Optional[BoundsLike] = None,
|
|
65
|
+
area: Optional[BaseGeometry] = None,
|
|
66
|
+
base_dir: Optional[MPathLike] = None,
|
|
67
|
+
) -> Generator[Item, None, None]:
|
|
68
|
+
for item in self.get_catalog(base_dir=base_dir).search(
|
|
69
|
+
time=time,
|
|
70
|
+
query=self.query,
|
|
71
|
+
search_kwargs=dict(query=self.query) if self.query else None,
|
|
72
|
+
**self._spatial_subset(
|
|
73
|
+
bounds=bounds,
|
|
74
|
+
area=area,
|
|
75
|
+
),
|
|
76
|
+
):
|
|
77
|
+
yield self.apply_item_modifier_funcs(item)
|
|
78
|
+
|
|
79
|
+
def apply_item_modifier_funcs(self, item: Item) -> Item:
|
|
80
|
+
try:
|
|
81
|
+
for modifier in self.item_modifier_funcs:
|
|
82
|
+
item = modifier(item)
|
|
83
|
+
except GEOSException as exc:
|
|
84
|
+
raise ItemGeometryError(
|
|
85
|
+
f"item {item.get_self_href()} geometry could not be resolved: {str(exc)}"
|
|
86
|
+
)
|
|
87
|
+
return item
|
|
88
|
+
|
|
89
|
+
def get_catalog(self, base_dir: Optional[MPathLike] = None) -> CollectionSearcher:
|
|
90
|
+
match self.catalog_type:
|
|
91
|
+
case "search":
|
|
92
|
+
return STACSearchCollection(self.collection)
|
|
93
|
+
case "static":
|
|
94
|
+
return STACStaticCollection(
|
|
95
|
+
collection=MPath(self.collection).absolute_path(base_dir=base_dir)
|
|
96
|
+
)
|
|
97
|
+
|
|
98
|
+
def eo_bands(self, base_dir: Optional[MPathLike] = None) -> List[str]:
|
|
99
|
+
return self.get_catalog(base_dir=base_dir).eo_bands
|
|
100
|
+
|
|
101
|
+
@model_validator(mode="before")
|
|
102
|
+
def deprecate_max_cloud_cover(cls, values: Dict[str, Any]) -> Dict[str, Any]:
|
|
103
|
+
if "max_cloud_cover" in values:
|
|
104
|
+
raise DeprecationWarning(
|
|
105
|
+
"'max_cloud_cover' will be deprecated soon. Please use 'eo:cloud_cover<=...' in the source 'query' field.",
|
|
106
|
+
)
|
|
107
|
+
elif "area" in values: # pragma: no cover
|
|
108
|
+
raise NotImplementedError("please use 'bounds' as spatial subset for now")
|
|
109
|
+
return values
|