mapchete-eo 2025.7.0__py2.py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- mapchete_eo/__init__.py +1 -0
- mapchete_eo/archives/__init__.py +0 -0
- mapchete_eo/archives/base.py +65 -0
- mapchete_eo/array/__init__.py +0 -0
- mapchete_eo/array/buffer.py +16 -0
- mapchete_eo/array/color.py +29 -0
- mapchete_eo/array/convert.py +157 -0
- mapchete_eo/base.py +528 -0
- mapchete_eo/blacklist.txt +175 -0
- mapchete_eo/cli/__init__.py +30 -0
- mapchete_eo/cli/bounds.py +22 -0
- mapchete_eo/cli/options_arguments.py +243 -0
- mapchete_eo/cli/s2_brdf.py +77 -0
- mapchete_eo/cli/s2_cat_results.py +146 -0
- mapchete_eo/cli/s2_find_broken_products.py +93 -0
- mapchete_eo/cli/s2_jp2_static_catalog.py +166 -0
- mapchete_eo/cli/s2_mask.py +71 -0
- mapchete_eo/cli/s2_mgrs.py +45 -0
- mapchete_eo/cli/s2_rgb.py +114 -0
- mapchete_eo/cli/s2_verify.py +129 -0
- mapchete_eo/cli/static_catalog.py +123 -0
- mapchete_eo/eostac.py +30 -0
- mapchete_eo/exceptions.py +87 -0
- mapchete_eo/geometry.py +271 -0
- mapchete_eo/image_operations/__init__.py +12 -0
- mapchete_eo/image_operations/color_correction.py +136 -0
- mapchete_eo/image_operations/compositing.py +247 -0
- mapchete_eo/image_operations/dtype_scale.py +43 -0
- mapchete_eo/image_operations/fillnodata.py +130 -0
- mapchete_eo/image_operations/filters.py +319 -0
- mapchete_eo/image_operations/linear_normalization.py +81 -0
- mapchete_eo/image_operations/sigmoidal.py +114 -0
- mapchete_eo/io/__init__.py +37 -0
- mapchete_eo/io/assets.py +492 -0
- mapchete_eo/io/items.py +147 -0
- mapchete_eo/io/levelled_cubes.py +228 -0
- mapchete_eo/io/path.py +144 -0
- mapchete_eo/io/products.py +413 -0
- mapchete_eo/io/profiles.py +45 -0
- mapchete_eo/known_catalogs.py +42 -0
- mapchete_eo/platforms/sentinel2/__init__.py +17 -0
- mapchete_eo/platforms/sentinel2/archives.py +190 -0
- mapchete_eo/platforms/sentinel2/bandpass_adjustment.py +104 -0
- mapchete_eo/platforms/sentinel2/brdf/__init__.py +8 -0
- mapchete_eo/platforms/sentinel2/brdf/config.py +32 -0
- mapchete_eo/platforms/sentinel2/brdf/correction.py +260 -0
- mapchete_eo/platforms/sentinel2/brdf/hls.py +251 -0
- mapchete_eo/platforms/sentinel2/brdf/models.py +44 -0
- mapchete_eo/platforms/sentinel2/brdf/protocols.py +27 -0
- mapchete_eo/platforms/sentinel2/brdf/ross_thick.py +136 -0
- mapchete_eo/platforms/sentinel2/brdf/sun_angle_arrays.py +76 -0
- mapchete_eo/platforms/sentinel2/config.py +181 -0
- mapchete_eo/platforms/sentinel2/driver.py +78 -0
- mapchete_eo/platforms/sentinel2/masks.py +325 -0
- mapchete_eo/platforms/sentinel2/metadata_parser.py +734 -0
- mapchete_eo/platforms/sentinel2/path_mappers/__init__.py +29 -0
- mapchete_eo/platforms/sentinel2/path_mappers/base.py +56 -0
- mapchete_eo/platforms/sentinel2/path_mappers/earthsearch.py +34 -0
- mapchete_eo/platforms/sentinel2/path_mappers/metadata_xml.py +135 -0
- mapchete_eo/platforms/sentinel2/path_mappers/sinergise.py +105 -0
- mapchete_eo/platforms/sentinel2/preprocessing_tasks.py +26 -0
- mapchete_eo/platforms/sentinel2/processing_baseline.py +160 -0
- mapchete_eo/platforms/sentinel2/product.py +669 -0
- mapchete_eo/platforms/sentinel2/types.py +109 -0
- mapchete_eo/processes/__init__.py +0 -0
- mapchete_eo/processes/config.py +51 -0
- mapchete_eo/processes/dtype_scale.py +112 -0
- mapchete_eo/processes/eo_to_xarray.py +19 -0
- mapchete_eo/processes/merge_rasters.py +235 -0
- mapchete_eo/product.py +278 -0
- mapchete_eo/protocols.py +56 -0
- mapchete_eo/search/__init__.py +14 -0
- mapchete_eo/search/base.py +222 -0
- mapchete_eo/search/config.py +42 -0
- mapchete_eo/search/s2_mgrs.py +314 -0
- mapchete_eo/search/stac_search.py +251 -0
- mapchete_eo/search/stac_static.py +236 -0
- mapchete_eo/search/utm_search.py +251 -0
- mapchete_eo/settings.py +24 -0
- mapchete_eo/sort.py +48 -0
- mapchete_eo/time.py +53 -0
- mapchete_eo/types.py +73 -0
- mapchete_eo-2025.7.0.dist-info/METADATA +38 -0
- mapchete_eo-2025.7.0.dist-info/RECORD +87 -0
- mapchete_eo-2025.7.0.dist-info/WHEEL +5 -0
- mapchete_eo-2025.7.0.dist-info/entry_points.txt +11 -0
- mapchete_eo-2025.7.0.dist-info/licenses/LICENSE +21 -0
mapchete_eo/product.py
ADDED
|
@@ -0,0 +1,278 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import logging
|
|
4
|
+
from typing import Any, List, Literal, Optional, Set
|
|
5
|
+
|
|
6
|
+
import numpy as np
|
|
7
|
+
import numpy.ma as ma
|
|
8
|
+
import pystac
|
|
9
|
+
import xarray as xr
|
|
10
|
+
from mapchete import Timer
|
|
11
|
+
from mapchete.io.raster import ReferencedRaster
|
|
12
|
+
from mapchete.path import MPath, MPathLike
|
|
13
|
+
from mapchete.protocols import GridProtocol
|
|
14
|
+
from mapchete.types import Bounds, NodataVals
|
|
15
|
+
from numpy.typing import DTypeLike
|
|
16
|
+
from rasterio.enums import Resampling
|
|
17
|
+
from shapely.geometry import shape
|
|
18
|
+
|
|
19
|
+
from mapchete_eo.array.convert import to_dataarray
|
|
20
|
+
from mapchete_eo.io import get_item_property, item_to_np_array
|
|
21
|
+
from mapchete_eo.protocols import EOProductProtocol
|
|
22
|
+
from mapchete_eo.settings import mapchete_eo_settings
|
|
23
|
+
from mapchete_eo.types import BandLocation
|
|
24
|
+
|
|
25
|
+
logger = logging.getLogger(__name__)
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
class EOProduct(EOProductProtocol):
|
|
29
|
+
"""Wrapper class around a pystac.Item which provides read functions."""
|
|
30
|
+
|
|
31
|
+
default_dtype: DTypeLike = np.uint16
|
|
32
|
+
|
|
33
|
+
def __init__(self, item: pystac.Item):
|
|
34
|
+
self.item_dict = item.to_dict()
|
|
35
|
+
self.__geo_interface__ = self.item.geometry
|
|
36
|
+
self.bounds = Bounds.from_inp(shape(self))
|
|
37
|
+
self.crs = mapchete_eo_settings.default_catalog_crs
|
|
38
|
+
|
|
39
|
+
def __repr__(self):
|
|
40
|
+
return f"<EOProduct product_id={self.item.id}>"
|
|
41
|
+
|
|
42
|
+
def clear_cached_data(self):
|
|
43
|
+
pass
|
|
44
|
+
|
|
45
|
+
@property
|
|
46
|
+
def item(self) -> pystac.Item:
|
|
47
|
+
return pystac.Item.from_dict(self.item_dict)
|
|
48
|
+
|
|
49
|
+
@classmethod
|
|
50
|
+
def from_stac_item(self, item: pystac.Item, **kwargs) -> EOProduct:
|
|
51
|
+
return EOProduct(item)
|
|
52
|
+
|
|
53
|
+
def get_mask(self) -> ReferencedRaster: ...
|
|
54
|
+
|
|
55
|
+
def read(
|
|
56
|
+
self,
|
|
57
|
+
assets: Optional[List[str]] = None,
|
|
58
|
+
eo_bands: Optional[List[str]] = None,
|
|
59
|
+
grid: Optional[GridProtocol] = None,
|
|
60
|
+
resampling: Resampling = Resampling.nearest,
|
|
61
|
+
nodatavals: NodataVals = None,
|
|
62
|
+
x_axis_name: str = "x",
|
|
63
|
+
y_axis_name: str = "y",
|
|
64
|
+
raise_empty: bool = True,
|
|
65
|
+
**kwargs,
|
|
66
|
+
) -> xr.Dataset:
|
|
67
|
+
"""Read bands and assets into xarray."""
|
|
68
|
+
# developer info: all fancy stuff for special platforms like Sentinel-2
|
|
69
|
+
# should be implemented in the respective read_np_array() methods which get
|
|
70
|
+
# called by this method. No need to apply masks etc. here too.
|
|
71
|
+
if isinstance(nodatavals, list):
|
|
72
|
+
nodataval = nodatavals[0]
|
|
73
|
+
elif isinstance(nodatavals, float):
|
|
74
|
+
nodataval = nodatavals
|
|
75
|
+
else:
|
|
76
|
+
nodataval = nodatavals
|
|
77
|
+
|
|
78
|
+
assets = assets or []
|
|
79
|
+
eo_bands = eo_bands or []
|
|
80
|
+
data_var_names = assets or eo_bands
|
|
81
|
+
return xr.Dataset(
|
|
82
|
+
data_vars={
|
|
83
|
+
data_var_name: to_dataarray(
|
|
84
|
+
asset_arr,
|
|
85
|
+
x_axis_name=x_axis_name,
|
|
86
|
+
y_axis_name=y_axis_name,
|
|
87
|
+
name=data_var_name,
|
|
88
|
+
attrs=dict(item_id=self.item.id),
|
|
89
|
+
)
|
|
90
|
+
for asset_arr, data_var_name in zip(
|
|
91
|
+
self.read_np_array(
|
|
92
|
+
assets=assets,
|
|
93
|
+
eo_bands=eo_bands,
|
|
94
|
+
grid=grid,
|
|
95
|
+
resampling=resampling,
|
|
96
|
+
nodatavals=nodatavals,
|
|
97
|
+
raise_empty=raise_empty,
|
|
98
|
+
**kwargs,
|
|
99
|
+
),
|
|
100
|
+
data_var_names,
|
|
101
|
+
)
|
|
102
|
+
},
|
|
103
|
+
coords={},
|
|
104
|
+
attrs=dict(self.item.properties, id=self.item.id, _FillValue=nodataval),
|
|
105
|
+
)
|
|
106
|
+
|
|
107
|
+
def read_np_array(
|
|
108
|
+
self,
|
|
109
|
+
assets: Optional[List[str]] = None,
|
|
110
|
+
eo_bands: Optional[List[str]] = None,
|
|
111
|
+
grid: Optional[GridProtocol] = None,
|
|
112
|
+
resampling: Resampling = Resampling.nearest,
|
|
113
|
+
nodatavals: NodataVals = None,
|
|
114
|
+
raise_empty: bool = True,
|
|
115
|
+
apply_offset: bool = True,
|
|
116
|
+
apply_scale: bool = False,
|
|
117
|
+
**kwargs,
|
|
118
|
+
) -> ma.MaskedArray:
|
|
119
|
+
assets = assets or []
|
|
120
|
+
eo_bands = eo_bands or []
|
|
121
|
+
bands = assets or eo_bands
|
|
122
|
+
logger.debug("%s: reading assets %s over %s", self, bands, grid)
|
|
123
|
+
with Timer() as t:
|
|
124
|
+
out = item_to_np_array(
|
|
125
|
+
self.item,
|
|
126
|
+
self.assets_eo_bands_to_band_locations(assets, eo_bands),
|
|
127
|
+
grid=grid,
|
|
128
|
+
resampling=resampling,
|
|
129
|
+
nodatavals=nodatavals,
|
|
130
|
+
raise_empty=raise_empty,
|
|
131
|
+
apply_offset=apply_offset,
|
|
132
|
+
)
|
|
133
|
+
logger.debug("%s: read in %s", self, t)
|
|
134
|
+
return out
|
|
135
|
+
|
|
136
|
+
def empty_array(
|
|
137
|
+
self,
|
|
138
|
+
count: int,
|
|
139
|
+
grid: GridProtocol,
|
|
140
|
+
fill_value: int = 0,
|
|
141
|
+
dtype: Optional[DTypeLike] = None,
|
|
142
|
+
) -> ma.MaskedArray:
|
|
143
|
+
shape = (count, *grid.shape)
|
|
144
|
+
dtype = dtype or self.default_dtype
|
|
145
|
+
return ma.MaskedArray(
|
|
146
|
+
data=np.full(shape, fill_value=fill_value, dtype=dtype),
|
|
147
|
+
mask=np.ones(shape, dtype=bool),
|
|
148
|
+
fill_value=fill_value,
|
|
149
|
+
)
|
|
150
|
+
|
|
151
|
+
def get_property(self, property: str) -> Any:
|
|
152
|
+
return get_item_property(self.item, property)
|
|
153
|
+
|
|
154
|
+
def eo_bands_to_band_location(self, eo_bands: List[str]) -> List[BandLocation]:
|
|
155
|
+
return eo_bands_to_band_locations(self.item, eo_bands)
|
|
156
|
+
|
|
157
|
+
def assets_eo_bands_to_band_locations(
|
|
158
|
+
self,
|
|
159
|
+
assets: Optional[List[str]] = None,
|
|
160
|
+
eo_bands: Optional[List[str]] = None,
|
|
161
|
+
) -> List[BandLocation]:
|
|
162
|
+
assets = assets or []
|
|
163
|
+
eo_bands = eo_bands or []
|
|
164
|
+
if assets and eo_bands:
|
|
165
|
+
raise ValueError("assets and eo_bands cannot be provided at the same time")
|
|
166
|
+
if assets:
|
|
167
|
+
return [BandLocation(asset_name=asset) for asset in assets]
|
|
168
|
+
elif eo_bands:
|
|
169
|
+
return self.eo_bands_to_band_location(eo_bands)
|
|
170
|
+
else:
|
|
171
|
+
raise ValueError("assets or eo_bands have to be provided")
|
|
172
|
+
|
|
173
|
+
|
|
174
|
+
def eo_bands_to_band_locations(
|
|
175
|
+
item: pystac.Item,
|
|
176
|
+
eo_bands: List[str],
|
|
177
|
+
role: Literal["data", "reflectance", "visual"] = "data",
|
|
178
|
+
) -> List[BandLocation]:
|
|
179
|
+
"""
|
|
180
|
+
Find out location (asset and band index) of EO band.
|
|
181
|
+
"""
|
|
182
|
+
return [find_eo_band(item, eo_band, role=role) for eo_band in eo_bands]
|
|
183
|
+
|
|
184
|
+
|
|
185
|
+
def find_eo_band(
|
|
186
|
+
item: pystac.Item,
|
|
187
|
+
eo_band_name: str,
|
|
188
|
+
role: Literal["data", "reflectance", "visual"] = "data",
|
|
189
|
+
) -> BandLocation:
|
|
190
|
+
"""
|
|
191
|
+
Tries to find the location of the most appropriate band using the EO band name.
|
|
192
|
+
|
|
193
|
+
This function looks into all assets and all eo bands for the given name and role.
|
|
194
|
+
"""
|
|
195
|
+
results = []
|
|
196
|
+
for asset_name, asset in item.assets.items():
|
|
197
|
+
# search in eo:bands and alternatively in bands for eo:common_name
|
|
198
|
+
for band_index, band_info in enumerate(
|
|
199
|
+
asset.extra_fields.get("eo:bands", asset.extra_fields.get("bands", [])), 1
|
|
200
|
+
):
|
|
201
|
+
if (
|
|
202
|
+
# if name matches eo band name
|
|
203
|
+
(
|
|
204
|
+
eo_band_name == band_info.get("name")
|
|
205
|
+
or eo_band_name == band_info.get("eo:common_name")
|
|
206
|
+
)
|
|
207
|
+
# if role is given, make sure it matches with desired role
|
|
208
|
+
and (asset.roles is None or role in asset.roles)
|
|
209
|
+
):
|
|
210
|
+
results.append(
|
|
211
|
+
BandLocation.from_asset(
|
|
212
|
+
name=asset_name,
|
|
213
|
+
band_index=band_index,
|
|
214
|
+
asset=asset,
|
|
215
|
+
)
|
|
216
|
+
)
|
|
217
|
+
|
|
218
|
+
if len(results) == 0:
|
|
219
|
+
raise KeyError(f"EO band {eo_band_name} not found in item assets")
|
|
220
|
+
|
|
221
|
+
elif len(results) == 1:
|
|
222
|
+
return results[0]
|
|
223
|
+
|
|
224
|
+
# if results are ambiguous, further filter them
|
|
225
|
+
else:
|
|
226
|
+
# only use locations which seem to have the original resolution
|
|
227
|
+
for matches in [_asset_name_equals_eo_name, _is_original_sampling]:
|
|
228
|
+
filtered_results = [
|
|
229
|
+
band_location for band_location in results if matches(band_location)
|
|
230
|
+
]
|
|
231
|
+
if len(filtered_results) == 1:
|
|
232
|
+
return filtered_results[0]
|
|
233
|
+
else: # pragma: no cover
|
|
234
|
+
raise ValueError(
|
|
235
|
+
f"EO band '{eo_band_name}' found in multiple assets: {', '.join(map(str, results))}"
|
|
236
|
+
)
|
|
237
|
+
|
|
238
|
+
|
|
239
|
+
def _asset_name_equals_eo_name(band_location: BandLocation) -> bool:
|
|
240
|
+
return band_location.asset_name == band_location.eo_band_name
|
|
241
|
+
|
|
242
|
+
|
|
243
|
+
def _is_original_sampling(band_location: BandLocation) -> bool:
|
|
244
|
+
return band_location.roles == [] or "sampling:original" in band_location.roles
|
|
245
|
+
|
|
246
|
+
|
|
247
|
+
def add_to_blacklist(path: MPathLike, blacklist: Optional[MPath] = None) -> None:
|
|
248
|
+
blacklist = blacklist or mapchete_eo_settings.blacklist
|
|
249
|
+
|
|
250
|
+
if blacklist is None:
|
|
251
|
+
return
|
|
252
|
+
|
|
253
|
+
blacklist = MPath.from_inp(blacklist)
|
|
254
|
+
|
|
255
|
+
path = MPath.from_inp(path)
|
|
256
|
+
|
|
257
|
+
# make sure paths stay unique
|
|
258
|
+
if str(path) not in blacklist_products(blacklist):
|
|
259
|
+
logger.debug("add path %s to blacklist", str(path))
|
|
260
|
+
try:
|
|
261
|
+
with blacklist.open("a") as dst:
|
|
262
|
+
dst.write(f"{path}\n")
|
|
263
|
+
except FileNotFoundError:
|
|
264
|
+
with blacklist.open("w") as dst:
|
|
265
|
+
dst.write(f"{path}\n")
|
|
266
|
+
|
|
267
|
+
|
|
268
|
+
def blacklist_products(blacklist: Optional[MPathLike] = None) -> Set[str]:
|
|
269
|
+
blacklist = blacklist or mapchete_eo_settings.blacklist
|
|
270
|
+
if blacklist is None:
|
|
271
|
+
raise ValueError("no blacklist is defined")
|
|
272
|
+
blacklist = MPath.from_inp(blacklist)
|
|
273
|
+
|
|
274
|
+
try:
|
|
275
|
+
return set(blacklist.read_text().splitlines())
|
|
276
|
+
except FileNotFoundError:
|
|
277
|
+
logger.debug("%s does not exist, returning empty set", str(blacklist))
|
|
278
|
+
return set()
|
mapchete_eo/protocols.py
ADDED
|
@@ -0,0 +1,56 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from typing import Any, Dict, List, Optional, Protocol
|
|
4
|
+
|
|
5
|
+
import numpy.ma as ma
|
|
6
|
+
import pystac
|
|
7
|
+
import xarray as xr
|
|
8
|
+
from mapchete.protocols import GridProtocol
|
|
9
|
+
from mapchete.types import Bounds, NodataVals
|
|
10
|
+
from rasterio.crs import CRS
|
|
11
|
+
from rasterio.enums import Resampling
|
|
12
|
+
|
|
13
|
+
from mapchete_eo.types import DateTimeLike
|
|
14
|
+
from mapchete.io.raster import ReferencedRaster
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
class EOProductProtocol(Protocol):
|
|
18
|
+
bounds: Bounds
|
|
19
|
+
crs: CRS
|
|
20
|
+
__geo_interface__: Optional[Dict[str, Any]]
|
|
21
|
+
|
|
22
|
+
@classmethod
|
|
23
|
+
def from_stac_item(self, item: pystac.Item, **kwargs) -> EOProductProtocol: ...
|
|
24
|
+
|
|
25
|
+
def get_mask(self) -> ReferencedRaster: ...
|
|
26
|
+
|
|
27
|
+
def read(
|
|
28
|
+
self,
|
|
29
|
+
assets: Optional[List[str]] = None,
|
|
30
|
+
eo_bands: Optional[List[str]] = None,
|
|
31
|
+
grid: Optional[GridProtocol] = None,
|
|
32
|
+
resampling: Resampling = Resampling.nearest,
|
|
33
|
+
nodatavals: NodataVals = None,
|
|
34
|
+
x_axis_name: str = "x",
|
|
35
|
+
y_axis_name: str = "y",
|
|
36
|
+
**kwargs,
|
|
37
|
+
) -> xr.Dataset: ...
|
|
38
|
+
|
|
39
|
+
def read_np_array(
|
|
40
|
+
self,
|
|
41
|
+
assets: Optional[List[str]] = None,
|
|
42
|
+
eo_bands: Optional[List[str]] = None,
|
|
43
|
+
grid: Optional[GridProtocol] = None,
|
|
44
|
+
resampling: Resampling = Resampling.nearest,
|
|
45
|
+
nodatavals: NodataVals = None,
|
|
46
|
+
**kwargs,
|
|
47
|
+
) -> ma.MaskedArray: ...
|
|
48
|
+
|
|
49
|
+
def get_property(self, property: str) -> Any: ...
|
|
50
|
+
|
|
51
|
+
@property
|
|
52
|
+
def item(self) -> pystac.Item: ...
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
class DateTimeProtocol(Protocol):
|
|
56
|
+
datetime: DateTimeLike
|
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
"""
|
|
2
|
+
A catalog is an instance with a specific endpoint and a specific collection.
|
|
3
|
+
|
|
4
|
+
The catalog class aims to abstract product search as well as homogenization
|
|
5
|
+
of product metadata.
|
|
6
|
+
|
|
7
|
+
It helps the InputData class to find the input products and their metadata.
|
|
8
|
+
"""
|
|
9
|
+
|
|
10
|
+
from mapchete_eo.search.stac_search import STACSearchCatalog
|
|
11
|
+
from mapchete_eo.search.stac_static import STACStaticCatalog
|
|
12
|
+
from mapchete_eo.search.utm_search import UTMSearchCatalog
|
|
13
|
+
|
|
14
|
+
__all__ = ["STACSearchCatalog", "STACStaticCatalog", "UTMSearchCatalog"]
|
|
@@ -0,0 +1,222 @@
|
|
|
1
|
+
import json
|
|
2
|
+
import logging
|
|
3
|
+
from abc import ABC, abstractmethod
|
|
4
|
+
from typing import Any, Callable, Dict, Generator, List, Optional, Type, Union
|
|
5
|
+
|
|
6
|
+
from pydantic import BaseModel
|
|
7
|
+
from pystac import Item, Catalog, CatalogType, Extent
|
|
8
|
+
from mapchete.path import MPath, MPathLike
|
|
9
|
+
from mapchete.types import Bounds
|
|
10
|
+
from pystac.collection import Collection
|
|
11
|
+
from pystac.stac_io import DefaultStacIO
|
|
12
|
+
from pystac_client import Client
|
|
13
|
+
from pystac_client.stac_api_io import StacApiIO
|
|
14
|
+
from rasterio.profiles import Profile
|
|
15
|
+
from shapely.geometry.base import BaseGeometry
|
|
16
|
+
|
|
17
|
+
from mapchete_eo.io.assets import get_assets, get_metadata_assets
|
|
18
|
+
from mapchete_eo.types import TimeRange
|
|
19
|
+
|
|
20
|
+
logger = logging.getLogger(__name__)
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
class FSSpecStacIO(StacApiIO):
|
|
24
|
+
"""Custom class which allows I/O operations on object storage."""
|
|
25
|
+
|
|
26
|
+
def read_text(self, source: MPathLike, *args, **kwargs) -> str:
|
|
27
|
+
return MPath.from_inp(source).read_text()
|
|
28
|
+
|
|
29
|
+
def write_text(self, dest: MPathLike, txt: str, *args, **kwargs) -> None:
|
|
30
|
+
path = MPath.from_inp(dest)
|
|
31
|
+
if not path.parent.exists():
|
|
32
|
+
path.parent.makedirs(exist_ok=True)
|
|
33
|
+
with path.open("w") as dst:
|
|
34
|
+
return dst.write(txt)
|
|
35
|
+
|
|
36
|
+
# TODO: investigate in pystac why this has to be a staticmethod
|
|
37
|
+
@staticmethod
|
|
38
|
+
def save_json(dest: MPathLike, json_dict: dict, *args, **kwargs) -> None:
|
|
39
|
+
path = MPath.from_inp(dest)
|
|
40
|
+
if not path.parent.exists():
|
|
41
|
+
path.parent.makedirs(exist_ok=True)
|
|
42
|
+
with path.open("w") as dst:
|
|
43
|
+
return dst.write(json.dumps(json_dict, indent=2))
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
class CatalogSearcher(ABC):
|
|
47
|
+
"""
|
|
48
|
+
This class serves as a bridge between an Archive and a catalog implementation.
|
|
49
|
+
"""
|
|
50
|
+
|
|
51
|
+
eo_bands: List[str]
|
|
52
|
+
id: str
|
|
53
|
+
description: str
|
|
54
|
+
stac_extensions: List[str]
|
|
55
|
+
collections: List[str]
|
|
56
|
+
config_cls: Type[BaseModel]
|
|
57
|
+
|
|
58
|
+
@abstractmethod
|
|
59
|
+
def search(
|
|
60
|
+
self,
|
|
61
|
+
time: Optional[Union[TimeRange, List[TimeRange]]] = None,
|
|
62
|
+
bounds: Optional[Bounds] = None,
|
|
63
|
+
area: Optional[BaseGeometry] = None,
|
|
64
|
+
search_kwargs: Optional[Dict[str, Any]] = None,
|
|
65
|
+
) -> Generator[Item, None, None]: ...
|
|
66
|
+
|
|
67
|
+
|
|
68
|
+
class StaticCatalogWriterMixin(CatalogSearcher):
|
|
69
|
+
client: Client
|
|
70
|
+
id: str
|
|
71
|
+
description: str
|
|
72
|
+
stac_extensions: List[str]
|
|
73
|
+
|
|
74
|
+
@abstractmethod
|
|
75
|
+
def get_collections(self) -> List[Collection]: # pragma: no cover
|
|
76
|
+
...
|
|
77
|
+
|
|
78
|
+
def write_static_catalog(
|
|
79
|
+
self,
|
|
80
|
+
output_path: MPathLike,
|
|
81
|
+
bounds: Optional[Bounds] = None,
|
|
82
|
+
area: Optional[BaseGeometry] = None,
|
|
83
|
+
time: Optional[TimeRange] = None,
|
|
84
|
+
search_kwargs: Optional[Dict[str, Any]] = None,
|
|
85
|
+
name: Optional[str] = None,
|
|
86
|
+
description: Optional[str] = None,
|
|
87
|
+
assets: Optional[List[str]] = None,
|
|
88
|
+
assets_dst_resolution: Union[None, float, int] = None,
|
|
89
|
+
assets_convert_profile: Optional[Profile] = None,
|
|
90
|
+
copy_metadata: bool = False,
|
|
91
|
+
metadata_parser_classes: Optional[tuple] = None,
|
|
92
|
+
overwrite: bool = False,
|
|
93
|
+
stac_io: DefaultStacIO = FSSpecStacIO(),
|
|
94
|
+
progress_callback: Optional[Callable] = None,
|
|
95
|
+
) -> MPath:
|
|
96
|
+
"""Dump static version of current items."""
|
|
97
|
+
output_path = MPath.from_inp(output_path)
|
|
98
|
+
assets = assets or []
|
|
99
|
+
# initialize catalog
|
|
100
|
+
catalog_json = output_path / "catalog.json"
|
|
101
|
+
if catalog_json.exists():
|
|
102
|
+
logger.debug("open existing catalog %s", str(catalog_json))
|
|
103
|
+
client = Client.from_file(catalog_json)
|
|
104
|
+
# catalog = pystac.Catalog.from_file(catalog_json)
|
|
105
|
+
existing_collections = list(client.get_collections())
|
|
106
|
+
else:
|
|
107
|
+
existing_collections = []
|
|
108
|
+
catalog = Catalog(
|
|
109
|
+
name or f"{self.id}",
|
|
110
|
+
description or f"Static subset of {self.description}",
|
|
111
|
+
stac_extensions=self.stac_extensions,
|
|
112
|
+
href=str(catalog_json),
|
|
113
|
+
catalog_type=CatalogType.SELF_CONTAINED,
|
|
114
|
+
)
|
|
115
|
+
src_items = list(
|
|
116
|
+
self.search(
|
|
117
|
+
time=time, bounds=bounds, area=area, search_kwargs=search_kwargs
|
|
118
|
+
)
|
|
119
|
+
)
|
|
120
|
+
for collection in self.get_collections():
|
|
121
|
+
# collect all items and download assets if required
|
|
122
|
+
items: List[Item] = []
|
|
123
|
+
item_ids = set()
|
|
124
|
+
for n, item in enumerate(src_items, 1):
|
|
125
|
+
logger.debug("found item %s", item)
|
|
126
|
+
item = item.clone()
|
|
127
|
+
if assets:
|
|
128
|
+
logger.debug("get assets %s", assets)
|
|
129
|
+
item = get_assets(
|
|
130
|
+
item,
|
|
131
|
+
assets,
|
|
132
|
+
output_path / collection.id / item.id,
|
|
133
|
+
resolution=assets_dst_resolution,
|
|
134
|
+
convert_profile=assets_convert_profile,
|
|
135
|
+
overwrite=overwrite,
|
|
136
|
+
ignore_if_exists=True,
|
|
137
|
+
)
|
|
138
|
+
if copy_metadata:
|
|
139
|
+
item = get_metadata_assets(
|
|
140
|
+
item,
|
|
141
|
+
output_path / collection.id / item.id,
|
|
142
|
+
metadata_parser_classes=metadata_parser_classes,
|
|
143
|
+
resolution=assets_dst_resolution,
|
|
144
|
+
convert_profile=assets_convert_profile,
|
|
145
|
+
overwrite=overwrite,
|
|
146
|
+
)
|
|
147
|
+
# this has to be set to None, otherwise pystac will mess up the asset paths
|
|
148
|
+
# after normalizing
|
|
149
|
+
item.set_self_href(None)
|
|
150
|
+
|
|
151
|
+
items.append(item)
|
|
152
|
+
item_ids.add(item.id)
|
|
153
|
+
|
|
154
|
+
if progress_callback:
|
|
155
|
+
progress_callback(n=n, total=len(src_items))
|
|
156
|
+
|
|
157
|
+
for existing_collection in existing_collections:
|
|
158
|
+
if existing_collection.id == collection.id:
|
|
159
|
+
logger.debug("try to find unregistered items in collection")
|
|
160
|
+
collection_root_path = MPath.from_inp(
|
|
161
|
+
existing_collection.get_self_href()
|
|
162
|
+
).parent
|
|
163
|
+
for subpath in collection_root_path.ls():
|
|
164
|
+
if subpath.is_directory():
|
|
165
|
+
try:
|
|
166
|
+
item = Item.from_file(
|
|
167
|
+
subpath / subpath.with_suffix(".json").name
|
|
168
|
+
)
|
|
169
|
+
if item.id not in item_ids:
|
|
170
|
+
logger.debug(
|
|
171
|
+
"add existing item with id %s", item.id
|
|
172
|
+
)
|
|
173
|
+
items.append(item)
|
|
174
|
+
item_ids.add(item.id)
|
|
175
|
+
except FileNotFoundError:
|
|
176
|
+
pass
|
|
177
|
+
break
|
|
178
|
+
# create collection and copy metadata
|
|
179
|
+
logger.debug("create new collection")
|
|
180
|
+
out_collection = Collection(
|
|
181
|
+
id=collection.id,
|
|
182
|
+
extent=Extent.from_items(items),
|
|
183
|
+
description=collection.description,
|
|
184
|
+
title=collection.title,
|
|
185
|
+
stac_extensions=collection.stac_extensions,
|
|
186
|
+
license=collection.license,
|
|
187
|
+
keywords=collection.keywords,
|
|
188
|
+
providers=collection.providers,
|
|
189
|
+
summaries=collection.summaries,
|
|
190
|
+
extra_fields=collection.extra_fields,
|
|
191
|
+
catalog_type=CatalogType.SELF_CONTAINED,
|
|
192
|
+
)
|
|
193
|
+
|
|
194
|
+
# finally, add all items to collection
|
|
195
|
+
for item in items:
|
|
196
|
+
out_collection.add_item(item)
|
|
197
|
+
|
|
198
|
+
out_collection.update_extent_from_items()
|
|
199
|
+
|
|
200
|
+
catalog.add_child(out_collection)
|
|
201
|
+
|
|
202
|
+
logger.debug("write catalog to %s", output_path)
|
|
203
|
+
catalog.normalize_hrefs(str(output_path))
|
|
204
|
+
catalog.make_all_asset_hrefs_relative()
|
|
205
|
+
catalog.save(dest_href=str(output_path), stac_io=stac_io)
|
|
206
|
+
|
|
207
|
+
return catalog_json
|
|
208
|
+
|
|
209
|
+
|
|
210
|
+
def filter_items(
|
|
211
|
+
items: Generator[Item, None, None],
|
|
212
|
+
cloud_cover_field: str = "eo:cloud_cover",
|
|
213
|
+
max_cloud_cover: float = 100.0,
|
|
214
|
+
) -> Generator[Item, None, None]:
|
|
215
|
+
"""
|
|
216
|
+
Only for cloudcover now, this can and should be adapted for filter field and value
|
|
217
|
+
the field and value for the item filter would be defined in search.config.py corresponding configs
|
|
218
|
+
and passed down to the individual search approaches via said config and this Function.
|
|
219
|
+
"""
|
|
220
|
+
for item in items:
|
|
221
|
+
if item.properties.get(cloud_cover_field, 0.0) <= max_cloud_cover:
|
|
222
|
+
yield item
|
|
@@ -0,0 +1,42 @@
|
|
|
1
|
+
from typing import Optional
|
|
2
|
+
|
|
3
|
+
from mapchete.path import MPath, MPathLike
|
|
4
|
+
from pydantic import BaseModel
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
class StacSearchConfig(BaseModel):
|
|
8
|
+
max_cloud_cover: float = 100.0
|
|
9
|
+
catalog_chunk_threshold: int = 10_000
|
|
10
|
+
catalog_chunk_zoom: int = 5
|
|
11
|
+
catalog_pagesize: int = 100
|
|
12
|
+
footprint_buffer: float = 0
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
class StacStaticConfig(BaseModel):
|
|
16
|
+
max_cloud_cover: float = 100.0
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
class UTMSearchConfig(BaseModel):
|
|
20
|
+
max_cloud_cover: float = 100.0
|
|
21
|
+
|
|
22
|
+
sinergise_aws_collections: dict = dict(
|
|
23
|
+
S2_L2A=dict(
|
|
24
|
+
id="sentinel-s2-l2a",
|
|
25
|
+
path=MPath(
|
|
26
|
+
"https://sentinel-s2-l2a-stac.s3.amazonaws.com/sentinel-s2-l2a.json"
|
|
27
|
+
),
|
|
28
|
+
),
|
|
29
|
+
S2_L1C=dict(
|
|
30
|
+
id="sentinel-s2-l1c",
|
|
31
|
+
path=MPath(
|
|
32
|
+
"https://sentinel-s2-l1c-stac.s3.amazonaws.com/sentinel-s2-l1c.json"
|
|
33
|
+
),
|
|
34
|
+
),
|
|
35
|
+
S1_GRD=dict(
|
|
36
|
+
id="sentinel-s1-l1c",
|
|
37
|
+
path=MPath(
|
|
38
|
+
"https://sentinel-s1-l1c-stac.s3.amazonaws.com/sentinel-s1-l1c.json"
|
|
39
|
+
),
|
|
40
|
+
),
|
|
41
|
+
)
|
|
42
|
+
search_index: Optional[MPathLike] = None
|