mapchete-eo 2025.7.0__py2.py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (87) hide show
  1. mapchete_eo/__init__.py +1 -0
  2. mapchete_eo/archives/__init__.py +0 -0
  3. mapchete_eo/archives/base.py +65 -0
  4. mapchete_eo/array/__init__.py +0 -0
  5. mapchete_eo/array/buffer.py +16 -0
  6. mapchete_eo/array/color.py +29 -0
  7. mapchete_eo/array/convert.py +157 -0
  8. mapchete_eo/base.py +528 -0
  9. mapchete_eo/blacklist.txt +175 -0
  10. mapchete_eo/cli/__init__.py +30 -0
  11. mapchete_eo/cli/bounds.py +22 -0
  12. mapchete_eo/cli/options_arguments.py +243 -0
  13. mapchete_eo/cli/s2_brdf.py +77 -0
  14. mapchete_eo/cli/s2_cat_results.py +146 -0
  15. mapchete_eo/cli/s2_find_broken_products.py +93 -0
  16. mapchete_eo/cli/s2_jp2_static_catalog.py +166 -0
  17. mapchete_eo/cli/s2_mask.py +71 -0
  18. mapchete_eo/cli/s2_mgrs.py +45 -0
  19. mapchete_eo/cli/s2_rgb.py +114 -0
  20. mapchete_eo/cli/s2_verify.py +129 -0
  21. mapchete_eo/cli/static_catalog.py +123 -0
  22. mapchete_eo/eostac.py +30 -0
  23. mapchete_eo/exceptions.py +87 -0
  24. mapchete_eo/geometry.py +271 -0
  25. mapchete_eo/image_operations/__init__.py +12 -0
  26. mapchete_eo/image_operations/color_correction.py +136 -0
  27. mapchete_eo/image_operations/compositing.py +247 -0
  28. mapchete_eo/image_operations/dtype_scale.py +43 -0
  29. mapchete_eo/image_operations/fillnodata.py +130 -0
  30. mapchete_eo/image_operations/filters.py +319 -0
  31. mapchete_eo/image_operations/linear_normalization.py +81 -0
  32. mapchete_eo/image_operations/sigmoidal.py +114 -0
  33. mapchete_eo/io/__init__.py +37 -0
  34. mapchete_eo/io/assets.py +492 -0
  35. mapchete_eo/io/items.py +147 -0
  36. mapchete_eo/io/levelled_cubes.py +228 -0
  37. mapchete_eo/io/path.py +144 -0
  38. mapchete_eo/io/products.py +413 -0
  39. mapchete_eo/io/profiles.py +45 -0
  40. mapchete_eo/known_catalogs.py +42 -0
  41. mapchete_eo/platforms/sentinel2/__init__.py +17 -0
  42. mapchete_eo/platforms/sentinel2/archives.py +190 -0
  43. mapchete_eo/platforms/sentinel2/bandpass_adjustment.py +104 -0
  44. mapchete_eo/platforms/sentinel2/brdf/__init__.py +8 -0
  45. mapchete_eo/platforms/sentinel2/brdf/config.py +32 -0
  46. mapchete_eo/platforms/sentinel2/brdf/correction.py +260 -0
  47. mapchete_eo/platforms/sentinel2/brdf/hls.py +251 -0
  48. mapchete_eo/platforms/sentinel2/brdf/models.py +44 -0
  49. mapchete_eo/platforms/sentinel2/brdf/protocols.py +27 -0
  50. mapchete_eo/platforms/sentinel2/brdf/ross_thick.py +136 -0
  51. mapchete_eo/platforms/sentinel2/brdf/sun_angle_arrays.py +76 -0
  52. mapchete_eo/platforms/sentinel2/config.py +181 -0
  53. mapchete_eo/platforms/sentinel2/driver.py +78 -0
  54. mapchete_eo/platforms/sentinel2/masks.py +325 -0
  55. mapchete_eo/platforms/sentinel2/metadata_parser.py +734 -0
  56. mapchete_eo/platforms/sentinel2/path_mappers/__init__.py +29 -0
  57. mapchete_eo/platforms/sentinel2/path_mappers/base.py +56 -0
  58. mapchete_eo/platforms/sentinel2/path_mappers/earthsearch.py +34 -0
  59. mapchete_eo/platforms/sentinel2/path_mappers/metadata_xml.py +135 -0
  60. mapchete_eo/platforms/sentinel2/path_mappers/sinergise.py +105 -0
  61. mapchete_eo/platforms/sentinel2/preprocessing_tasks.py +26 -0
  62. mapchete_eo/platforms/sentinel2/processing_baseline.py +160 -0
  63. mapchete_eo/platforms/sentinel2/product.py +669 -0
  64. mapchete_eo/platforms/sentinel2/types.py +109 -0
  65. mapchete_eo/processes/__init__.py +0 -0
  66. mapchete_eo/processes/config.py +51 -0
  67. mapchete_eo/processes/dtype_scale.py +112 -0
  68. mapchete_eo/processes/eo_to_xarray.py +19 -0
  69. mapchete_eo/processes/merge_rasters.py +235 -0
  70. mapchete_eo/product.py +278 -0
  71. mapchete_eo/protocols.py +56 -0
  72. mapchete_eo/search/__init__.py +14 -0
  73. mapchete_eo/search/base.py +222 -0
  74. mapchete_eo/search/config.py +42 -0
  75. mapchete_eo/search/s2_mgrs.py +314 -0
  76. mapchete_eo/search/stac_search.py +251 -0
  77. mapchete_eo/search/stac_static.py +236 -0
  78. mapchete_eo/search/utm_search.py +251 -0
  79. mapchete_eo/settings.py +24 -0
  80. mapchete_eo/sort.py +48 -0
  81. mapchete_eo/time.py +53 -0
  82. mapchete_eo/types.py +73 -0
  83. mapchete_eo-2025.7.0.dist-info/METADATA +38 -0
  84. mapchete_eo-2025.7.0.dist-info/RECORD +87 -0
  85. mapchete_eo-2025.7.0.dist-info/WHEEL +5 -0
  86. mapchete_eo-2025.7.0.dist-info/entry_points.txt +11 -0
  87. mapchete_eo-2025.7.0.dist-info/licenses/LICENSE +21 -0
mapchete_eo/product.py ADDED
@@ -0,0 +1,278 @@
1
+ from __future__ import annotations
2
+
3
+ import logging
4
+ from typing import Any, List, Literal, Optional, Set
5
+
6
+ import numpy as np
7
+ import numpy.ma as ma
8
+ import pystac
9
+ import xarray as xr
10
+ from mapchete import Timer
11
+ from mapchete.io.raster import ReferencedRaster
12
+ from mapchete.path import MPath, MPathLike
13
+ from mapchete.protocols import GridProtocol
14
+ from mapchete.types import Bounds, NodataVals
15
+ from numpy.typing import DTypeLike
16
+ from rasterio.enums import Resampling
17
+ from shapely.geometry import shape
18
+
19
+ from mapchete_eo.array.convert import to_dataarray
20
+ from mapchete_eo.io import get_item_property, item_to_np_array
21
+ from mapchete_eo.protocols import EOProductProtocol
22
+ from mapchete_eo.settings import mapchete_eo_settings
23
+ from mapchete_eo.types import BandLocation
24
+
25
+ logger = logging.getLogger(__name__)
26
+
27
+
28
+ class EOProduct(EOProductProtocol):
29
+ """Wrapper class around a pystac.Item which provides read functions."""
30
+
31
+ default_dtype: DTypeLike = np.uint16
32
+
33
+ def __init__(self, item: pystac.Item):
34
+ self.item_dict = item.to_dict()
35
+ self.__geo_interface__ = self.item.geometry
36
+ self.bounds = Bounds.from_inp(shape(self))
37
+ self.crs = mapchete_eo_settings.default_catalog_crs
38
+
39
+ def __repr__(self):
40
+ return f"<EOProduct product_id={self.item.id}>"
41
+
42
+ def clear_cached_data(self):
43
+ pass
44
+
45
+ @property
46
+ def item(self) -> pystac.Item:
47
+ return pystac.Item.from_dict(self.item_dict)
48
+
49
+ @classmethod
50
+ def from_stac_item(self, item: pystac.Item, **kwargs) -> EOProduct:
51
+ return EOProduct(item)
52
+
53
+ def get_mask(self) -> ReferencedRaster: ...
54
+
55
+ def read(
56
+ self,
57
+ assets: Optional[List[str]] = None,
58
+ eo_bands: Optional[List[str]] = None,
59
+ grid: Optional[GridProtocol] = None,
60
+ resampling: Resampling = Resampling.nearest,
61
+ nodatavals: NodataVals = None,
62
+ x_axis_name: str = "x",
63
+ y_axis_name: str = "y",
64
+ raise_empty: bool = True,
65
+ **kwargs,
66
+ ) -> xr.Dataset:
67
+ """Read bands and assets into xarray."""
68
+ # developer info: all fancy stuff for special platforms like Sentinel-2
69
+ # should be implemented in the respective read_np_array() methods which get
70
+ # called by this method. No need to apply masks etc. here too.
71
+ if isinstance(nodatavals, list):
72
+ nodataval = nodatavals[0]
73
+ elif isinstance(nodatavals, float):
74
+ nodataval = nodatavals
75
+ else:
76
+ nodataval = nodatavals
77
+
78
+ assets = assets or []
79
+ eo_bands = eo_bands or []
80
+ data_var_names = assets or eo_bands
81
+ return xr.Dataset(
82
+ data_vars={
83
+ data_var_name: to_dataarray(
84
+ asset_arr,
85
+ x_axis_name=x_axis_name,
86
+ y_axis_name=y_axis_name,
87
+ name=data_var_name,
88
+ attrs=dict(item_id=self.item.id),
89
+ )
90
+ for asset_arr, data_var_name in zip(
91
+ self.read_np_array(
92
+ assets=assets,
93
+ eo_bands=eo_bands,
94
+ grid=grid,
95
+ resampling=resampling,
96
+ nodatavals=nodatavals,
97
+ raise_empty=raise_empty,
98
+ **kwargs,
99
+ ),
100
+ data_var_names,
101
+ )
102
+ },
103
+ coords={},
104
+ attrs=dict(self.item.properties, id=self.item.id, _FillValue=nodataval),
105
+ )
106
+
107
+ def read_np_array(
108
+ self,
109
+ assets: Optional[List[str]] = None,
110
+ eo_bands: Optional[List[str]] = None,
111
+ grid: Optional[GridProtocol] = None,
112
+ resampling: Resampling = Resampling.nearest,
113
+ nodatavals: NodataVals = None,
114
+ raise_empty: bool = True,
115
+ apply_offset: bool = True,
116
+ apply_scale: bool = False,
117
+ **kwargs,
118
+ ) -> ma.MaskedArray:
119
+ assets = assets or []
120
+ eo_bands = eo_bands or []
121
+ bands = assets or eo_bands
122
+ logger.debug("%s: reading assets %s over %s", self, bands, grid)
123
+ with Timer() as t:
124
+ out = item_to_np_array(
125
+ self.item,
126
+ self.assets_eo_bands_to_band_locations(assets, eo_bands),
127
+ grid=grid,
128
+ resampling=resampling,
129
+ nodatavals=nodatavals,
130
+ raise_empty=raise_empty,
131
+ apply_offset=apply_offset,
132
+ )
133
+ logger.debug("%s: read in %s", self, t)
134
+ return out
135
+
136
+ def empty_array(
137
+ self,
138
+ count: int,
139
+ grid: GridProtocol,
140
+ fill_value: int = 0,
141
+ dtype: Optional[DTypeLike] = None,
142
+ ) -> ma.MaskedArray:
143
+ shape = (count, *grid.shape)
144
+ dtype = dtype or self.default_dtype
145
+ return ma.MaskedArray(
146
+ data=np.full(shape, fill_value=fill_value, dtype=dtype),
147
+ mask=np.ones(shape, dtype=bool),
148
+ fill_value=fill_value,
149
+ )
150
+
151
+ def get_property(self, property: str) -> Any:
152
+ return get_item_property(self.item, property)
153
+
154
+ def eo_bands_to_band_location(self, eo_bands: List[str]) -> List[BandLocation]:
155
+ return eo_bands_to_band_locations(self.item, eo_bands)
156
+
157
+ def assets_eo_bands_to_band_locations(
158
+ self,
159
+ assets: Optional[List[str]] = None,
160
+ eo_bands: Optional[List[str]] = None,
161
+ ) -> List[BandLocation]:
162
+ assets = assets or []
163
+ eo_bands = eo_bands or []
164
+ if assets and eo_bands:
165
+ raise ValueError("assets and eo_bands cannot be provided at the same time")
166
+ if assets:
167
+ return [BandLocation(asset_name=asset) for asset in assets]
168
+ elif eo_bands:
169
+ return self.eo_bands_to_band_location(eo_bands)
170
+ else:
171
+ raise ValueError("assets or eo_bands have to be provided")
172
+
173
+
174
+ def eo_bands_to_band_locations(
175
+ item: pystac.Item,
176
+ eo_bands: List[str],
177
+ role: Literal["data", "reflectance", "visual"] = "data",
178
+ ) -> List[BandLocation]:
179
+ """
180
+ Find out location (asset and band index) of EO band.
181
+ """
182
+ return [find_eo_band(item, eo_band, role=role) for eo_band in eo_bands]
183
+
184
+
185
+ def find_eo_band(
186
+ item: pystac.Item,
187
+ eo_band_name: str,
188
+ role: Literal["data", "reflectance", "visual"] = "data",
189
+ ) -> BandLocation:
190
+ """
191
+ Tries to find the location of the most appropriate band using the EO band name.
192
+
193
+ This function looks into all assets and all eo bands for the given name and role.
194
+ """
195
+ results = []
196
+ for asset_name, asset in item.assets.items():
197
+ # search in eo:bands and alternatively in bands for eo:common_name
198
+ for band_index, band_info in enumerate(
199
+ asset.extra_fields.get("eo:bands", asset.extra_fields.get("bands", [])), 1
200
+ ):
201
+ if (
202
+ # if name matches eo band name
203
+ (
204
+ eo_band_name == band_info.get("name")
205
+ or eo_band_name == band_info.get("eo:common_name")
206
+ )
207
+ # if role is given, make sure it matches with desired role
208
+ and (asset.roles is None or role in asset.roles)
209
+ ):
210
+ results.append(
211
+ BandLocation.from_asset(
212
+ name=asset_name,
213
+ band_index=band_index,
214
+ asset=asset,
215
+ )
216
+ )
217
+
218
+ if len(results) == 0:
219
+ raise KeyError(f"EO band {eo_band_name} not found in item assets")
220
+
221
+ elif len(results) == 1:
222
+ return results[0]
223
+
224
+ # if results are ambiguous, further filter them
225
+ else:
226
+ # only use locations which seem to have the original resolution
227
+ for matches in [_asset_name_equals_eo_name, _is_original_sampling]:
228
+ filtered_results = [
229
+ band_location for band_location in results if matches(band_location)
230
+ ]
231
+ if len(filtered_results) == 1:
232
+ return filtered_results[0]
233
+ else: # pragma: no cover
234
+ raise ValueError(
235
+ f"EO band '{eo_band_name}' found in multiple assets: {', '.join(map(str, results))}"
236
+ )
237
+
238
+
239
+ def _asset_name_equals_eo_name(band_location: BandLocation) -> bool:
240
+ return band_location.asset_name == band_location.eo_band_name
241
+
242
+
243
+ def _is_original_sampling(band_location: BandLocation) -> bool:
244
+ return band_location.roles == [] or "sampling:original" in band_location.roles
245
+
246
+
247
+ def add_to_blacklist(path: MPathLike, blacklist: Optional[MPath] = None) -> None:
248
+ blacklist = blacklist or mapchete_eo_settings.blacklist
249
+
250
+ if blacklist is None:
251
+ return
252
+
253
+ blacklist = MPath.from_inp(blacklist)
254
+
255
+ path = MPath.from_inp(path)
256
+
257
+ # make sure paths stay unique
258
+ if str(path) not in blacklist_products(blacklist):
259
+ logger.debug("add path %s to blacklist", str(path))
260
+ try:
261
+ with blacklist.open("a") as dst:
262
+ dst.write(f"{path}\n")
263
+ except FileNotFoundError:
264
+ with blacklist.open("w") as dst:
265
+ dst.write(f"{path}\n")
266
+
267
+
268
+ def blacklist_products(blacklist: Optional[MPathLike] = None) -> Set[str]:
269
+ blacklist = blacklist or mapchete_eo_settings.blacklist
270
+ if blacklist is None:
271
+ raise ValueError("no blacklist is defined")
272
+ blacklist = MPath.from_inp(blacklist)
273
+
274
+ try:
275
+ return set(blacklist.read_text().splitlines())
276
+ except FileNotFoundError:
277
+ logger.debug("%s does not exist, returning empty set", str(blacklist))
278
+ return set()
@@ -0,0 +1,56 @@
1
+ from __future__ import annotations
2
+
3
+ from typing import Any, Dict, List, Optional, Protocol
4
+
5
+ import numpy.ma as ma
6
+ import pystac
7
+ import xarray as xr
8
+ from mapchete.protocols import GridProtocol
9
+ from mapchete.types import Bounds, NodataVals
10
+ from rasterio.crs import CRS
11
+ from rasterio.enums import Resampling
12
+
13
+ from mapchete_eo.types import DateTimeLike
14
+ from mapchete.io.raster import ReferencedRaster
15
+
16
+
17
+ class EOProductProtocol(Protocol):
18
+ bounds: Bounds
19
+ crs: CRS
20
+ __geo_interface__: Optional[Dict[str, Any]]
21
+
22
+ @classmethod
23
+ def from_stac_item(self, item: pystac.Item, **kwargs) -> EOProductProtocol: ...
24
+
25
+ def get_mask(self) -> ReferencedRaster: ...
26
+
27
+ def read(
28
+ self,
29
+ assets: Optional[List[str]] = None,
30
+ eo_bands: Optional[List[str]] = None,
31
+ grid: Optional[GridProtocol] = None,
32
+ resampling: Resampling = Resampling.nearest,
33
+ nodatavals: NodataVals = None,
34
+ x_axis_name: str = "x",
35
+ y_axis_name: str = "y",
36
+ **kwargs,
37
+ ) -> xr.Dataset: ...
38
+
39
+ def read_np_array(
40
+ self,
41
+ assets: Optional[List[str]] = None,
42
+ eo_bands: Optional[List[str]] = None,
43
+ grid: Optional[GridProtocol] = None,
44
+ resampling: Resampling = Resampling.nearest,
45
+ nodatavals: NodataVals = None,
46
+ **kwargs,
47
+ ) -> ma.MaskedArray: ...
48
+
49
+ def get_property(self, property: str) -> Any: ...
50
+
51
+ @property
52
+ def item(self) -> pystac.Item: ...
53
+
54
+
55
+ class DateTimeProtocol(Protocol):
56
+ datetime: DateTimeLike
@@ -0,0 +1,14 @@
1
+ """
2
+ A catalog is an instance with a specific endpoint and a specific collection.
3
+
4
+ The catalog class aims to abstract product search as well as homogenization
5
+ of product metadata.
6
+
7
+ It helps the InputData class to find the input products and their metadata.
8
+ """
9
+
10
+ from mapchete_eo.search.stac_search import STACSearchCatalog
11
+ from mapchete_eo.search.stac_static import STACStaticCatalog
12
+ from mapchete_eo.search.utm_search import UTMSearchCatalog
13
+
14
+ __all__ = ["STACSearchCatalog", "STACStaticCatalog", "UTMSearchCatalog"]
@@ -0,0 +1,222 @@
1
+ import json
2
+ import logging
3
+ from abc import ABC, abstractmethod
4
+ from typing import Any, Callable, Dict, Generator, List, Optional, Type, Union
5
+
6
+ from pydantic import BaseModel
7
+ from pystac import Item, Catalog, CatalogType, Extent
8
+ from mapchete.path import MPath, MPathLike
9
+ from mapchete.types import Bounds
10
+ from pystac.collection import Collection
11
+ from pystac.stac_io import DefaultStacIO
12
+ from pystac_client import Client
13
+ from pystac_client.stac_api_io import StacApiIO
14
+ from rasterio.profiles import Profile
15
+ from shapely.geometry.base import BaseGeometry
16
+
17
+ from mapchete_eo.io.assets import get_assets, get_metadata_assets
18
+ from mapchete_eo.types import TimeRange
19
+
20
+ logger = logging.getLogger(__name__)
21
+
22
+
23
+ class FSSpecStacIO(StacApiIO):
24
+ """Custom class which allows I/O operations on object storage."""
25
+
26
+ def read_text(self, source: MPathLike, *args, **kwargs) -> str:
27
+ return MPath.from_inp(source).read_text()
28
+
29
+ def write_text(self, dest: MPathLike, txt: str, *args, **kwargs) -> None:
30
+ path = MPath.from_inp(dest)
31
+ if not path.parent.exists():
32
+ path.parent.makedirs(exist_ok=True)
33
+ with path.open("w") as dst:
34
+ return dst.write(txt)
35
+
36
+ # TODO: investigate in pystac why this has to be a staticmethod
37
+ @staticmethod
38
+ def save_json(dest: MPathLike, json_dict: dict, *args, **kwargs) -> None:
39
+ path = MPath.from_inp(dest)
40
+ if not path.parent.exists():
41
+ path.parent.makedirs(exist_ok=True)
42
+ with path.open("w") as dst:
43
+ return dst.write(json.dumps(json_dict, indent=2))
44
+
45
+
46
+ class CatalogSearcher(ABC):
47
+ """
48
+ This class serves as a bridge between an Archive and a catalog implementation.
49
+ """
50
+
51
+ eo_bands: List[str]
52
+ id: str
53
+ description: str
54
+ stac_extensions: List[str]
55
+ collections: List[str]
56
+ config_cls: Type[BaseModel]
57
+
58
+ @abstractmethod
59
+ def search(
60
+ self,
61
+ time: Optional[Union[TimeRange, List[TimeRange]]] = None,
62
+ bounds: Optional[Bounds] = None,
63
+ area: Optional[BaseGeometry] = None,
64
+ search_kwargs: Optional[Dict[str, Any]] = None,
65
+ ) -> Generator[Item, None, None]: ...
66
+
67
+
68
+ class StaticCatalogWriterMixin(CatalogSearcher):
69
+ client: Client
70
+ id: str
71
+ description: str
72
+ stac_extensions: List[str]
73
+
74
+ @abstractmethod
75
+ def get_collections(self) -> List[Collection]: # pragma: no cover
76
+ ...
77
+
78
+ def write_static_catalog(
79
+ self,
80
+ output_path: MPathLike,
81
+ bounds: Optional[Bounds] = None,
82
+ area: Optional[BaseGeometry] = None,
83
+ time: Optional[TimeRange] = None,
84
+ search_kwargs: Optional[Dict[str, Any]] = None,
85
+ name: Optional[str] = None,
86
+ description: Optional[str] = None,
87
+ assets: Optional[List[str]] = None,
88
+ assets_dst_resolution: Union[None, float, int] = None,
89
+ assets_convert_profile: Optional[Profile] = None,
90
+ copy_metadata: bool = False,
91
+ metadata_parser_classes: Optional[tuple] = None,
92
+ overwrite: bool = False,
93
+ stac_io: DefaultStacIO = FSSpecStacIO(),
94
+ progress_callback: Optional[Callable] = None,
95
+ ) -> MPath:
96
+ """Dump static version of current items."""
97
+ output_path = MPath.from_inp(output_path)
98
+ assets = assets or []
99
+ # initialize catalog
100
+ catalog_json = output_path / "catalog.json"
101
+ if catalog_json.exists():
102
+ logger.debug("open existing catalog %s", str(catalog_json))
103
+ client = Client.from_file(catalog_json)
104
+ # catalog = pystac.Catalog.from_file(catalog_json)
105
+ existing_collections = list(client.get_collections())
106
+ else:
107
+ existing_collections = []
108
+ catalog = Catalog(
109
+ name or f"{self.id}",
110
+ description or f"Static subset of {self.description}",
111
+ stac_extensions=self.stac_extensions,
112
+ href=str(catalog_json),
113
+ catalog_type=CatalogType.SELF_CONTAINED,
114
+ )
115
+ src_items = list(
116
+ self.search(
117
+ time=time, bounds=bounds, area=area, search_kwargs=search_kwargs
118
+ )
119
+ )
120
+ for collection in self.get_collections():
121
+ # collect all items and download assets if required
122
+ items: List[Item] = []
123
+ item_ids = set()
124
+ for n, item in enumerate(src_items, 1):
125
+ logger.debug("found item %s", item)
126
+ item = item.clone()
127
+ if assets:
128
+ logger.debug("get assets %s", assets)
129
+ item = get_assets(
130
+ item,
131
+ assets,
132
+ output_path / collection.id / item.id,
133
+ resolution=assets_dst_resolution,
134
+ convert_profile=assets_convert_profile,
135
+ overwrite=overwrite,
136
+ ignore_if_exists=True,
137
+ )
138
+ if copy_metadata:
139
+ item = get_metadata_assets(
140
+ item,
141
+ output_path / collection.id / item.id,
142
+ metadata_parser_classes=metadata_parser_classes,
143
+ resolution=assets_dst_resolution,
144
+ convert_profile=assets_convert_profile,
145
+ overwrite=overwrite,
146
+ )
147
+ # this has to be set to None, otherwise pystac will mess up the asset paths
148
+ # after normalizing
149
+ item.set_self_href(None)
150
+
151
+ items.append(item)
152
+ item_ids.add(item.id)
153
+
154
+ if progress_callback:
155
+ progress_callback(n=n, total=len(src_items))
156
+
157
+ for existing_collection in existing_collections:
158
+ if existing_collection.id == collection.id:
159
+ logger.debug("try to find unregistered items in collection")
160
+ collection_root_path = MPath.from_inp(
161
+ existing_collection.get_self_href()
162
+ ).parent
163
+ for subpath in collection_root_path.ls():
164
+ if subpath.is_directory():
165
+ try:
166
+ item = Item.from_file(
167
+ subpath / subpath.with_suffix(".json").name
168
+ )
169
+ if item.id not in item_ids:
170
+ logger.debug(
171
+ "add existing item with id %s", item.id
172
+ )
173
+ items.append(item)
174
+ item_ids.add(item.id)
175
+ except FileNotFoundError:
176
+ pass
177
+ break
178
+ # create collection and copy metadata
179
+ logger.debug("create new collection")
180
+ out_collection = Collection(
181
+ id=collection.id,
182
+ extent=Extent.from_items(items),
183
+ description=collection.description,
184
+ title=collection.title,
185
+ stac_extensions=collection.stac_extensions,
186
+ license=collection.license,
187
+ keywords=collection.keywords,
188
+ providers=collection.providers,
189
+ summaries=collection.summaries,
190
+ extra_fields=collection.extra_fields,
191
+ catalog_type=CatalogType.SELF_CONTAINED,
192
+ )
193
+
194
+ # finally, add all items to collection
195
+ for item in items:
196
+ out_collection.add_item(item)
197
+
198
+ out_collection.update_extent_from_items()
199
+
200
+ catalog.add_child(out_collection)
201
+
202
+ logger.debug("write catalog to %s", output_path)
203
+ catalog.normalize_hrefs(str(output_path))
204
+ catalog.make_all_asset_hrefs_relative()
205
+ catalog.save(dest_href=str(output_path), stac_io=stac_io)
206
+
207
+ return catalog_json
208
+
209
+
210
+ def filter_items(
211
+ items: Generator[Item, None, None],
212
+ cloud_cover_field: str = "eo:cloud_cover",
213
+ max_cloud_cover: float = 100.0,
214
+ ) -> Generator[Item, None, None]:
215
+ """
216
+ Only for cloudcover now, this can and should be adapted for filter field and value
217
+ the field and value for the item filter would be defined in search.config.py corresponding configs
218
+ and passed down to the individual search approaches via said config and this Function.
219
+ """
220
+ for item in items:
221
+ if item.properties.get(cloud_cover_field, 0.0) <= max_cloud_cover:
222
+ yield item
@@ -0,0 +1,42 @@
1
+ from typing import Optional
2
+
3
+ from mapchete.path import MPath, MPathLike
4
+ from pydantic import BaseModel
5
+
6
+
7
+ class StacSearchConfig(BaseModel):
8
+ max_cloud_cover: float = 100.0
9
+ catalog_chunk_threshold: int = 10_000
10
+ catalog_chunk_zoom: int = 5
11
+ catalog_pagesize: int = 100
12
+ footprint_buffer: float = 0
13
+
14
+
15
+ class StacStaticConfig(BaseModel):
16
+ max_cloud_cover: float = 100.0
17
+
18
+
19
+ class UTMSearchConfig(BaseModel):
20
+ max_cloud_cover: float = 100.0
21
+
22
+ sinergise_aws_collections: dict = dict(
23
+ S2_L2A=dict(
24
+ id="sentinel-s2-l2a",
25
+ path=MPath(
26
+ "https://sentinel-s2-l2a-stac.s3.amazonaws.com/sentinel-s2-l2a.json"
27
+ ),
28
+ ),
29
+ S2_L1C=dict(
30
+ id="sentinel-s2-l1c",
31
+ path=MPath(
32
+ "https://sentinel-s2-l1c-stac.s3.amazonaws.com/sentinel-s2-l1c.json"
33
+ ),
34
+ ),
35
+ S1_GRD=dict(
36
+ id="sentinel-s1-l1c",
37
+ path=MPath(
38
+ "https://sentinel-s1-l1c-stac.s3.amazonaws.com/sentinel-s1-l1c.json"
39
+ ),
40
+ ),
41
+ )
42
+ search_index: Optional[MPathLike] = None