mapchete-eo 2025.10.0__py2.py3-none-any.whl → 2025.11.0__py2.py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (66) hide show
  1. mapchete_eo/__init__.py +1 -1
  2. mapchete_eo/array/convert.py +7 -1
  3. mapchete_eo/base.py +123 -55
  4. mapchete_eo/cli/options_arguments.py +11 -27
  5. mapchete_eo/cli/s2_brdf.py +1 -1
  6. mapchete_eo/cli/s2_cat_results.py +4 -20
  7. mapchete_eo/cli/s2_find_broken_products.py +4 -20
  8. mapchete_eo/cli/s2_jp2_static_catalog.py +2 -2
  9. mapchete_eo/cli/static_catalog.py +4 -45
  10. mapchete_eo/eostac.py +1 -1
  11. mapchete_eo/io/assets.py +7 -7
  12. mapchete_eo/io/items.py +37 -22
  13. mapchete_eo/io/levelled_cubes.py +66 -35
  14. mapchete_eo/io/path.py +19 -8
  15. mapchete_eo/io/products.py +37 -27
  16. mapchete_eo/platforms/sentinel2/__init__.py +1 -1
  17. mapchete_eo/platforms/sentinel2/_mapper_registry.py +89 -0
  18. mapchete_eo/platforms/sentinel2/brdf/correction.py +1 -1
  19. mapchete_eo/platforms/sentinel2/brdf/hls.py +1 -1
  20. mapchete_eo/platforms/sentinel2/brdf/models.py +1 -1
  21. mapchete_eo/platforms/sentinel2/brdf/protocols.py +1 -1
  22. mapchete_eo/platforms/sentinel2/brdf/ross_thick.py +1 -1
  23. mapchete_eo/platforms/sentinel2/brdf/sun_angle_arrays.py +1 -1
  24. mapchete_eo/platforms/sentinel2/config.py +73 -13
  25. mapchete_eo/platforms/sentinel2/driver.py +0 -39
  26. mapchete_eo/platforms/sentinel2/metadata_parser/__init__.py +6 -0
  27. mapchete_eo/platforms/sentinel2/{path_mappers → metadata_parser}/base.py +1 -1
  28. mapchete_eo/platforms/sentinel2/{path_mappers/metadata_xml.py → metadata_parser/default_path_mapper.py} +2 -2
  29. mapchete_eo/platforms/sentinel2/metadata_parser/models.py +78 -0
  30. mapchete_eo/platforms/sentinel2/{metadata_parser.py → metadata_parser/s2metadata.py} +51 -146
  31. mapchete_eo/platforms/sentinel2/preconfigured_sources/__init__.py +57 -0
  32. mapchete_eo/platforms/sentinel2/preconfigured_sources/guessers.py +108 -0
  33. mapchete_eo/platforms/sentinel2/preconfigured_sources/item_mappers.py +171 -0
  34. mapchete_eo/platforms/sentinel2/preconfigured_sources/metadata_xml_mappers.py +217 -0
  35. mapchete_eo/platforms/sentinel2/preprocessing_tasks.py +22 -1
  36. mapchete_eo/platforms/sentinel2/processing_baseline.py +3 -0
  37. mapchete_eo/platforms/sentinel2/product.py +88 -23
  38. mapchete_eo/platforms/sentinel2/source.py +114 -0
  39. mapchete_eo/platforms/sentinel2/types.py +5 -0
  40. mapchete_eo/processes/merge_rasters.py +7 -3
  41. mapchete_eo/product.py +14 -9
  42. mapchete_eo/protocols.py +5 -0
  43. mapchete_eo/search/__init__.py +3 -3
  44. mapchete_eo/search/base.py +126 -100
  45. mapchete_eo/search/config.py +25 -4
  46. mapchete_eo/search/s2_mgrs.py +8 -9
  47. mapchete_eo/search/stac_search.py +111 -75
  48. mapchete_eo/search/stac_static.py +63 -94
  49. mapchete_eo/search/utm_search.py +39 -48
  50. mapchete_eo/settings.py +1 -0
  51. mapchete_eo/sort.py +16 -2
  52. mapchete_eo/source.py +107 -0
  53. {mapchete_eo-2025.10.0.dist-info → mapchete_eo-2025.11.0.dist-info}/METADATA +2 -1
  54. mapchete_eo-2025.11.0.dist-info/RECORD +89 -0
  55. {mapchete_eo-2025.10.0.dist-info → mapchete_eo-2025.11.0.dist-info}/entry_points.txt +1 -1
  56. mapchete_eo/archives/__init__.py +0 -0
  57. mapchete_eo/archives/base.py +0 -65
  58. mapchete_eo/geometry.py +0 -271
  59. mapchete_eo/known_catalogs.py +0 -42
  60. mapchete_eo/platforms/sentinel2/archives.py +0 -190
  61. mapchete_eo/platforms/sentinel2/path_mappers/__init__.py +0 -29
  62. mapchete_eo/platforms/sentinel2/path_mappers/earthsearch.py +0 -34
  63. mapchete_eo/platforms/sentinel2/path_mappers/sinergise.py +0 -105
  64. mapchete_eo-2025.10.0.dist-info/RECORD +0 -88
  65. {mapchete_eo-2025.10.0.dist-info → mapchete_eo-2025.11.0.dist-info}/WHEEL +0 -0
  66. {mapchete_eo-2025.10.0.dist-info → mapchete_eo-2025.11.0.dist-info}/licenses/LICENSE +0 -0
@@ -0,0 +1,217 @@
1
+ from mapchete.path import MPath, MPathLike
2
+
3
+ from mapchete_eo.platforms.sentinel2.metadata_parser.base import S2MetadataPathMapper
4
+ from mapchete_eo.platforms.sentinel2.processing_baseline import ProcessingBaseline
5
+ from mapchete_eo.platforms.sentinel2.types import (
6
+ BandQI,
7
+ L2ABand,
8
+ ProductQI,
9
+ ProductQIMaskResolution,
10
+ )
11
+
12
+
13
+ class SinergisePathMapper(S2MetadataPathMapper):
14
+ """
15
+ Return true paths of product quality assets from the Sinergise S2 bucket.
16
+
17
+ e.g.:
18
+ B01 detector footprints: s3://sentinel-s2-l2a/tiles/51/K/XR/2020/7/31/0/qi/MSK_DETFOO_B01.gml
19
+ Cloud masks: s3://sentinel-s2-l2a/tiles/51/K/XR/2020/7/31/0/qi/MSK_CLOUDS_B00.gml
20
+
21
+ newer products however:
22
+ B01 detector footprints: s3://sentinel-s2-l2a/tiles/51/K/XR/2022/6/6/0/qi/DETFOO_B01.jp2
23
+ no vector cloudmasks available anymore
24
+ """
25
+
26
+ _PRE_0400_MASK_PATHS = {
27
+ ProductQI.classification: "MSK_CLOUDS_B00.gml",
28
+ ProductQI.cloud_probability: "CLD_{resolution}.jp2", # are they really there?
29
+ ProductQI.snow_probability: "SNW_{resolution}.jp2", # are they really there?
30
+ BandQI.detector_footprints: "MSK_DETFOO_{band_identifier}.gml",
31
+ BandQI.technical_quality: "MSK_TECQUA_{band_identifier}.gml",
32
+ }
33
+ _POST_0400_MASK_PATHS = {
34
+ ProductQI.classification: "CLASSI_B00.jp2",
35
+ ProductQI.cloud_probability: "CLD_{resolution}.jp2",
36
+ ProductQI.snow_probability: "SNW_{resolution}.jp2",
37
+ BandQI.detector_footprints: "DETFOO_{band_identifier}.jp2",
38
+ BandQI.technical_quality: "QUALIT_{band_identifier}.jp2",
39
+ }
40
+
41
+ def __init__(
42
+ self,
43
+ url: MPathLike,
44
+ bucket: str = "sentinel-s2-l2a",
45
+ protocol: str = "s3",
46
+ baseline_version: str = "04.00",
47
+ **kwargs,
48
+ ):
49
+ url = MPath.from_inp(url)
50
+ tileinfo_path = url.parent / "tileInfo.json"
51
+ self._path = MPath(
52
+ "/".join(tileinfo_path.elements[-9:-1]), **tileinfo_path._kwargs
53
+ )
54
+ self._utm_zone, self._latitude_band, self._grid_square = self._path.split("/")[
55
+ 1:-4
56
+ ]
57
+ self._baseurl = bucket
58
+ self._protocol = protocol
59
+ self.processing_baseline = ProcessingBaseline.from_version(baseline_version)
60
+
61
+ def product_qi_mask(
62
+ self,
63
+ qi_mask: ProductQI,
64
+ resolution: ProductQIMaskResolution = ProductQIMaskResolution["60m"],
65
+ ) -> MPath:
66
+ """Determine product QI mask according to Sinergise bucket schema."""
67
+ if self.processing_baseline.version < "04.00":
68
+ mask_path = self._PRE_0400_MASK_PATHS[qi_mask]
69
+ else:
70
+ mask_path = self._POST_0400_MASK_PATHS[qi_mask]
71
+ key = f"{self._path}/qi/{mask_path.format(resolution=resolution.name)}"
72
+ return MPath.from_inp(f"{self._protocol}://{self._baseurl}/{key}")
73
+
74
+ def classification_mask(self) -> MPath:
75
+ return self.product_qi_mask(ProductQI.classification)
76
+
77
+ def cloud_probability_mask(
78
+ self, resolution: ProductQIMaskResolution = ProductQIMaskResolution["60m"]
79
+ ) -> MPath:
80
+ return self.product_qi_mask(ProductQI.cloud_probability, resolution=resolution)
81
+
82
+ def snow_probability_mask(
83
+ self, resolution: ProductQIMaskResolution = ProductQIMaskResolution["60m"]
84
+ ) -> MPath:
85
+ return self.product_qi_mask(ProductQI.snow_probability, resolution=resolution)
86
+
87
+ def band_qi_mask(self, qi_mask: BandQI, band: L2ABand) -> MPath:
88
+ """Determine product QI mask according to Sinergise bucket schema."""
89
+ try:
90
+ if self.processing_baseline.version < "04.00":
91
+ mask_path = self._PRE_0400_MASK_PATHS[qi_mask]
92
+ else:
93
+ mask_path = self._POST_0400_MASK_PATHS[qi_mask]
94
+ except KeyError:
95
+ raise DeprecationWarning(
96
+ f"'{qi_mask.name}' quality mask not found in this product"
97
+ )
98
+ key = f"{self._path}/qi/{mask_path.format(band_identifier=band.name)}"
99
+ return MPath.from_inp(f"{self._protocol}://{self._baseurl}/{key}")
100
+
101
+ def technical_quality_mask(self, band: L2ABand) -> MPath:
102
+ return self.band_qi_mask(BandQI.technical_quality, band)
103
+
104
+ def detector_footprints(self, band: L2ABand) -> MPath:
105
+ return self.band_qi_mask(BandQI.detector_footprints, band)
106
+
107
+
108
+ class EarthSearchPathMapper(SinergisePathMapper):
109
+ """
110
+ The COG archive maintained by E84 and covered by EarthSearch does not hold additional data
111
+ such as the GML files. This class maps the metadata masks to the current EarthSearch product.
112
+
113
+ e.g.:
114
+ B01 detector footprints: s3://sentinel-s2-l2a/tiles/51/K/XR/2020/7/31/0/qi/MSK_DETFOO_B01.gml
115
+ Cloud masks: s3://sentinel-s2-l2a/tiles/51/K/XR/2020/7/31/0/qi/MSK_CLOUDS_B00.gml
116
+
117
+ newer products however:
118
+ B01 detector footprints: s3://sentinel-s2-l2a/tiles/51/K/XR/2022/6/6/0/qi/DETFOO_B01.jp2
119
+ no vector cloudmasks available anymore
120
+ """
121
+
122
+ def __init__(
123
+ self,
124
+ metadata_xml: MPath,
125
+ alternative_metadata_baseurl: str = "sentinel-s2-l2a",
126
+ protocol: str = "s3",
127
+ baseline_version: str = "04.00",
128
+ **kwargs,
129
+ ):
130
+ basedir = metadata_xml.parent
131
+ self._path = (basedir / "tileinfo_metadata.json").read_json()["path"]
132
+ self._utm_zone, self._latitude_band, self._grid_square = basedir.elements[-6:-3]
133
+ self._baseurl = alternative_metadata_baseurl
134
+ self._protocol = protocol
135
+ self.processing_baseline = ProcessingBaseline.from_version(baseline_version)
136
+
137
+
138
+ class EarthSearchC1PathMapper(SinergisePathMapper):
139
+ """
140
+ The newer C1 collection has cloud and snow probability masks as assets, so we only need to
141
+ map to the rest.
142
+ """
143
+
144
+ def __init__(
145
+ self,
146
+ metadata_xml: MPath,
147
+ alternative_metadata_baseurl: str = "sentinel-s2-l2a",
148
+ protocol: str = "s3",
149
+ baseline_version: str = "04.00",
150
+ **kwargs,
151
+ ):
152
+ basedir = metadata_xml.parent
153
+ self._path = (basedir / "tileInfo.json").read_json()["path"]
154
+ self._utm_zone, self._latitude_band, self._grid_square = basedir.elements[-6:-3]
155
+ self._baseurl = alternative_metadata_baseurl
156
+ self._protocol = protocol
157
+ self.processing_baseline = ProcessingBaseline.from_version(baseline_version)
158
+
159
+
160
+ class CDSEPathMapper(S2MetadataPathMapper):
161
+ _MASK_FILENAMES = {
162
+ ProductQI.classification: "MSK_CLASSI_B00.jp2",
163
+ ProductQI.cloud_probability: "MSK_CLDPRB_{resolution}.jp2",
164
+ ProductQI.snow_probability: "MSK_SNWPRB_{resolution}.jp2",
165
+ BandQI.detector_footprints: "MSK_DETFOO_{band_identifier}.jp2",
166
+ BandQI.technical_quality: "MSK_QUALIT_{band_identifier}.jp2",
167
+ }
168
+
169
+ def __init__(
170
+ self,
171
+ url: MPathLike,
172
+ baseline_version: str = "04.00",
173
+ **kwargs,
174
+ ):
175
+ url = MPath.from_inp(url)
176
+ self._path = url.parent
177
+ self.processing_baseline = ProcessingBaseline.from_version(baseline_version)
178
+
179
+ def product_qi_mask(
180
+ self,
181
+ qi_mask: ProductQI,
182
+ resolution: ProductQIMaskResolution = ProductQIMaskResolution["60m"],
183
+ ) -> MPath:
184
+ """Determine product QI mask according to Sinergise bucket schema."""
185
+ mask_path = self._MASK_FILENAMES[qi_mask]
186
+ key = f"QI_DATA/{mask_path.format(resolution=resolution.name)}"
187
+ return self._path / key
188
+
189
+ def classification_mask(self) -> MPath:
190
+ return self.product_qi_mask(ProductQI.classification)
191
+
192
+ def cloud_probability_mask(
193
+ self, resolution: ProductQIMaskResolution = ProductQIMaskResolution["60m"]
194
+ ) -> MPath:
195
+ return self.product_qi_mask(ProductQI.cloud_probability, resolution=resolution)
196
+
197
+ def snow_probability_mask(
198
+ self, resolution: ProductQIMaskResolution = ProductQIMaskResolution["60m"]
199
+ ) -> MPath:
200
+ return self.product_qi_mask(ProductQI.snow_probability, resolution=resolution)
201
+
202
+ def band_qi_mask(self, qi_mask: BandQI, band: L2ABand) -> MPath:
203
+ """Determine product QI mask according to Sinergise bucket schema."""
204
+ try:
205
+ mask_path = self._MASK_FILENAMES[qi_mask]
206
+ except KeyError:
207
+ raise DeprecationWarning(
208
+ f"'{qi_mask.name}' quality mask not found in this product"
209
+ )
210
+ key = f"QI_DATA/{mask_path.format(band_identifier=band.name)}"
211
+ return self._path / key
212
+
213
+ def technical_quality_mask(self, band: L2ABand) -> MPath:
214
+ return self.band_qi_mask(BandQI.technical_quality, band)
215
+
216
+ def detector_footprints(self, band: L2ABand) -> MPath:
217
+ return self.band_qi_mask(BandQI.detector_footprints, band)
@@ -4,9 +4,12 @@ from typing import Optional, Union
4
4
  import pystac
5
5
 
6
6
  from mapchete_eo.exceptions import CorruptedProductMetadata
7
+ from mapchete_eo.io.items import get_item_property
7
8
  from mapchete_eo.platforms.sentinel2.config import CacheConfig
8
9
  from mapchete_eo.platforms.sentinel2.product import S2Product
10
+ from mapchete_eo.platforms.sentinel2.source import Sentinel2Source
9
11
  from mapchete_eo.product import add_to_blacklist
12
+ from mapchete_eo.settings import mapchete_eo_settings
10
13
 
11
14
  logger = logging.getLogger(__name__)
12
15
 
@@ -16,9 +19,27 @@ def parse_s2_product(
16
19
  cache_config: Optional[CacheConfig] = None,
17
20
  cache_all: bool = False,
18
21
  ) -> Union[S2Product, CorruptedProductMetadata]:
22
+ # use mapper from source if applickable
23
+ source: Union[Sentinel2Source, None] = item.properties.pop(
24
+ "mapchete_eo:source", None
25
+ )
19
26
  try:
20
27
  s2product = S2Product.from_stac_item(
21
- item, cache_config=cache_config, cache_all=cache_all
28
+ item,
29
+ cache_config=cache_config,
30
+ cache_all=cache_all,
31
+ metadata_mapper=None if source is None else source.get_s2metadata_mapper(),
32
+ item_modifier_funcs=None if source is None else source.item_modifier_funcs,
33
+ lazy_load_item=mapchete_eo_settings.lazy_load_stac_items,
34
+ item_property_cache={
35
+ key: get_item_property(item, key)
36
+ for key in [
37
+ "datetime",
38
+ "eo:cloud_cover",
39
+ "id",
40
+ "s2:datastrip_id",
41
+ ]
42
+ },
22
43
  )
23
44
  except CorruptedProductMetadata as exc:
24
45
  add_to_blacklist(item.get_self_href())
@@ -152,6 +152,9 @@ class ProcessingBaseline:
152
152
  self.band_mask_types = self.item_mapping.band_mask_types
153
153
  self.band_mask_extension = self.item_mapping.band_mask_extension
154
154
 
155
+ def __repr__(self) -> str:
156
+ return f"<ProcessingBaseline version={self.version}>"
157
+
155
158
  @staticmethod
156
159
  def from_version(version: Union[BaselineVersion, str]) -> "ProcessingBaseline":
157
160
  if isinstance(version, BaselineVersion):
@@ -1,22 +1,23 @@
1
1
  from __future__ import annotations
2
2
 
3
3
  import logging
4
- from typing import Dict, List, Optional, Union
4
+ from typing import Any, Callable, Dict, List, Optional, Union
5
5
 
6
6
  import numpy as np
7
7
  import numpy.ma as ma
8
- import pystac
9
8
  from mapchete.io.raster import ReferencedRaster, read_raster_window, resample_from_array
10
- from mapchete.geometry import reproject_geometry
9
+ from mapchete.geometry import reproject_geometry, buffer_antimeridian_safe
11
10
  from mapchete.path import MPath
12
11
  from mapchete.protocols import GridProtocol
13
12
  from mapchete.types import Bounds, Grid, NodataVals
13
+ from pystac import Item
14
14
  from rasterio.enums import Resampling
15
15
  from rasterio.features import rasterize
16
16
  from shapely.geometry import shape
17
17
 
18
18
 
19
19
  from mapchete_eo.array.buffer import buffer_array
20
+ from mapchete_eo.io.items import get_item_property
20
21
  from mapchete_eo.platforms.sentinel2.brdf.config import BRDFModels
21
22
  from mapchete_eo.platforms.sentinel2.brdf.correction import apply_correction
22
23
  from mapchete_eo.exceptions import (
@@ -27,7 +28,6 @@ from mapchete_eo.exceptions import (
27
28
  EmptyFootprintException,
28
29
  EmptyProductException,
29
30
  )
30
- from mapchete_eo.geometry import buffer_antimeridian_safe
31
31
  from mapchete_eo.io.assets import get_assets, read_mask_as_raster
32
32
  from mapchete_eo.io.path import asset_mpath, get_product_cache_path
33
33
  from mapchete_eo.io.profiles import COGDeflateProfile
@@ -41,7 +41,7 @@ from mapchete_eo.platforms.sentinel2.config import (
41
41
  CacheConfig,
42
42
  MaskConfig,
43
43
  )
44
- from mapchete_eo.platforms.sentinel2.metadata_parser import S2Metadata
44
+ from mapchete_eo.platforms.sentinel2.metadata_parser.s2metadata import S2Metadata
45
45
  from mapchete_eo.platforms.sentinel2.types import (
46
46
  CloudType,
47
47
  L2ABand,
@@ -56,11 +56,11 @@ logger = logging.getLogger(__name__)
56
56
 
57
57
 
58
58
  class Cache:
59
- item: pystac.Item
59
+ item: Item
60
60
  config: CacheConfig
61
61
  path: MPath
62
62
 
63
- def __init__(self, item: pystac.Item, config: CacheConfig):
63
+ def __init__(self, item: Item, config: CacheConfig):
64
64
  self.item = item
65
65
  self.config = config
66
66
  # TODO: maybe move this function here
@@ -143,21 +143,33 @@ class Cache:
143
143
 
144
144
 
145
145
  class S2Product(EOProduct, EOProductProtocol):
146
- item_dict: dict
146
+ _item_dict: Optional[dict] = None
147
147
  cache: Optional[Cache] = None
148
148
  _scl_cache: Dict[GridProtocol, np.ndarray]
149
+ _item_property_cache: Dict[str, Any]
149
150
 
150
151
  def __init__(
151
152
  self,
152
- item: pystac.Item,
153
+ item: Item,
153
154
  metadata: Optional[S2Metadata] = None,
154
155
  cache_config: Optional[CacheConfig] = None,
156
+ metadata_mapper: Optional[Callable[[Item], S2Metadata]] = None,
157
+ item_modifier_funcs: Optional[List[Callable[[Item], Item]]] = None,
158
+ lazy_load_item: bool = False,
159
+ item_property_cache: Optional[Dict[str, Any]] = None,
155
160
  ):
156
- self.item_dict = item.to_dict()
161
+ if lazy_load_item:
162
+ self._item_dict = None
163
+ else:
164
+ self._item_dict = item.to_dict()
165
+ self.item_href = item.self_href
157
166
  self.id = item.id
158
167
 
159
168
  self._metadata = metadata
169
+ self._metadata_mapper = metadata_mapper
170
+ self._item_modifier_funcs = item_modifier_funcs
160
171
  self._scl_cache = dict()
172
+ self._item_property_cache = item_property_cache or dict()
161
173
  self.cache = Cache(item, cache_config) if cache_config else None
162
174
 
163
175
  self.__geo_interface__ = item.geometry
@@ -167,12 +179,12 @@ class S2Product(EOProduct, EOProductProtocol):
167
179
  @classmethod
168
180
  def from_stac_item(
169
181
  self,
170
- item: pystac.Item,
182
+ item: Item,
171
183
  cache_config: Optional[CacheConfig] = None,
172
184
  cache_all: bool = False,
173
185
  **kwargs,
174
186
  ) -> S2Product:
175
- s2product = S2Product(item, cache_config=cache_config)
187
+ s2product = S2Product(item, cache_config=cache_config, **kwargs)
176
188
 
177
189
  if cache_all:
178
190
  # cache assets if configured
@@ -183,22 +195,37 @@ class S2Product(EOProduct, EOProductProtocol):
183
195
 
184
196
  return s2product
185
197
 
198
+ @property
199
+ def item(self) -> Item:
200
+ if not self._item:
201
+ if self._item_dict:
202
+ self._item = Item.from_dict(self._item_dict)
203
+ else:
204
+ item = Item.from_file(self.item_href)
205
+ for modifier in self._item_modifier_funcs or []:
206
+ item = modifier(item)
207
+ self._item = item
208
+ return self._item
209
+
186
210
  @property
187
211
  def metadata(self) -> S2Metadata:
188
212
  if not self._metadata:
189
- self._metadata = S2Metadata.from_stac_item(
190
- pystac.Item.from_dict(self.item_dict)
191
- )
213
+ if self._metadata_mapper:
214
+ self._metadata = self._metadata_mapper(self.item)
215
+ else:
216
+ self._metadata = S2Metadata.from_stac_item(self.item)
192
217
  return self._metadata
193
218
 
194
219
  def __repr__(self):
195
220
  return f"<S2Product product_id={self.id}>"
196
221
 
197
222
  def clear_cached_data(self):
198
- logger.debug("clear S2Product caches")
199
223
  if self._metadata is not None:
200
224
  self._metadata.clear_cached_data()
201
225
  self._metadata = None
226
+ if self._item is not None:
227
+ self._item = None
228
+ self._item_property_cache = dict()
202
229
  self._scl_cache = dict()
203
230
 
204
231
  def read_np_array(
@@ -215,7 +242,7 @@ class S2Product(EOProduct, EOProductProtocol):
215
242
  mask_config: MaskConfig = MaskConfig(),
216
243
  brdf_config: Optional[BRDFConfig] = None,
217
244
  fill_value: int = 0,
218
- target_mask: Optional[np.ndarray] = None,
245
+ read_mask: Optional[np.ndarray] = None,
219
246
  **kwargs,
220
247
  ) -> ma.MaskedArray:
221
248
  assets = assets or []
@@ -228,7 +255,9 @@ class S2Product(EOProduct, EOProductProtocol):
228
255
  count = len(assets)
229
256
  if isinstance(grid, Resolution):
230
257
  grid = self.metadata.grid(grid)
231
- mask = self.get_mask(grid, mask_config, target_mask=target_mask).data
258
+ mask = self.get_mask(
259
+ grid, mask_config, target_mask=None if read_mask is None else ~read_mask
260
+ ).data
232
261
  if nodatavals is None:
233
262
  nodatavals = fill_value
234
263
  elif fill_value is None and nodatavals is not None:
@@ -361,7 +390,23 @@ class S2Product(EOProduct, EOProductProtocol):
361
390
  cached_read: bool = False,
362
391
  ) -> ReferencedRaster:
363
392
  """Return cloud probability mask."""
364
- logger.debug("read cloud probability mask for %s", str(self))
393
+ if "cloud" in self.item.assets:
394
+ logger.debug("read cloud probability mask for %s from asset", str(self))
395
+ return read_mask_as_raster(
396
+ path=asset_mpath(item=self.item, asset="cloud"),
397
+ dst_grid=(
398
+ self.metadata.grid(grid)
399
+ if isinstance(grid, Resolution)
400
+ else Grid.from_obj(grid)
401
+ ),
402
+ resampling=resampling,
403
+ rasterize_value_func=lambda feature: True,
404
+ masked=False,
405
+ cached_read=cached_read,
406
+ )
407
+ logger.debug(
408
+ "read cloud probability mask for %s from metadata archive", str(self)
409
+ )
365
410
  return self.metadata.cloud_probability(
366
411
  dst_grid=grid,
367
412
  resampling=resampling,
@@ -377,7 +422,23 @@ class S2Product(EOProduct, EOProductProtocol):
377
422
  cached_read: bool = False,
378
423
  ) -> ReferencedRaster:
379
424
  """Return classification snow and ice mask."""
380
- logger.debug("read snow probability mask for %s", str(self))
425
+ if "snow" in self.item.assets:
426
+ logger.debug("read snow probability mask for %s from asset", str(self))
427
+ return read_mask_as_raster(
428
+ path=asset_mpath(item=self.item, asset="cloud"),
429
+ dst_grid=(
430
+ self.metadata.grid(grid)
431
+ if isinstance(grid, Resolution)
432
+ else Grid.from_obj(grid)
433
+ ),
434
+ resampling=resampling,
435
+ rasterize_value_func=lambda feature: True,
436
+ masked=False,
437
+ cached_read=cached_read,
438
+ )
439
+ logger.debug(
440
+ "read snow probability mask for %s from metadata archive", str(self)
441
+ )
381
442
  return self.metadata.snow_probability(
382
443
  dst_grid=grid,
383
444
  resampling=resampling,
@@ -464,13 +525,12 @@ class S2Product(EOProduct, EOProductProtocol):
464
525
  if isinstance(grid, Resolution)
465
526
  else Grid.from_obj(grid)
466
527
  )
467
-
468
528
  if target_mask is None:
469
529
  target_mask = np.zeros(shape=grid.shape, dtype=bool)
470
530
  else:
471
531
  if target_mask.shape != grid.shape:
472
532
  raise ValueError("a target mask must have the same shape as the grid")
473
- logger.debug("got custom target mask to start with: %s", target_mask)
533
+ logger.debug("got custom target mask to start with: %s", target_mask.shape)
474
534
 
475
535
  def _check_full(arr):
476
536
  # ATTENTION: target_mask and out have to be combined *after* mask was buffered!
@@ -569,6 +629,11 @@ class S2Product(EOProduct, EOProductProtocol):
569
629
  bounds=grid.bounds,
570
630
  )
571
631
 
632
+ def get_property(self, name: str) -> Any:
633
+ if name not in self._item_property_cache:
634
+ self._item_property_cache[name] = get_item_property(self.item, name)
635
+ return self._item_property_cache[name]
636
+
572
637
  def _apply_sentinel2_bandpass_adjustment(
573
638
  self, uncorrected: ma.MaskedArray, assets: List[str], computing_dtype=np.float32
574
639
  ) -> ma.MaskedArray:
@@ -662,7 +727,7 @@ class S2Product(EOProduct, EOProductProtocol):
662
727
  return out_arr
663
728
 
664
729
 
665
- def asset_name_to_l2a_band(item: pystac.Item, asset_name: str) -> L2ABand:
730
+ def asset_name_to_l2a_band(item: Item, asset_name: str) -> L2ABand:
666
731
  asset = item.assets[asset_name]
667
732
  asset_path = MPath(asset.href)
668
733
  band_name = asset_path.name.split(".")[0]
@@ -0,0 +1,114 @@
1
+ from __future__ import annotations
2
+
3
+ from typing import Optional, List, Callable, Dict, Any, Union
4
+ import warnings
5
+
6
+ from pydantic import model_validator
7
+ from pystac import Item
8
+
9
+ from mapchete_eo.platforms.sentinel2.metadata_parser.s2metadata import S2Metadata
10
+ from mapchete_eo.source import Source
11
+ from mapchete_eo.platforms.sentinel2.preconfigured_sources import (
12
+ DEPRECATED_ARCHIVES,
13
+ KNOWN_SOURCES,
14
+ )
15
+ from mapchete_eo.platforms.sentinel2.types import (
16
+ DataArchive,
17
+ MetadataArchive,
18
+ )
19
+ from mapchete_eo.platforms.sentinel2._mapper_registry import MAPPER_REGISTRIES
20
+
21
+
22
+ def known_collection_to_url(collection: str) -> str:
23
+ if collection in KNOWN_SOURCES:
24
+ return KNOWN_SOURCES[collection]["collection"]
25
+ return collection
26
+
27
+
28
+ class Sentinel2Source(Source):
29
+ """All information required to consume Sentinel-2 products."""
30
+
31
+ # extends base model with those properties
32
+ data_archive: Optional[DataArchive] = None
33
+ metadata_archive: MetadataArchive = "roda"
34
+
35
+ @property
36
+ def item_modifier_funcs(self) -> List[Callable]:
37
+ return [
38
+ func
39
+ for func in (self.get_id_mapper(), *self.get_stac_metadata_mappers())
40
+ if func is not None
41
+ ]
42
+
43
+ @model_validator(mode="before")
44
+ def determine_data_source(cls, values: Dict[str, Any]) -> Dict[str, Any]:
45
+ """Handles short names of sources."""
46
+ if isinstance(values, str):
47
+ values = dict(collection=values)
48
+ collection = values.get("collection", None)
49
+ if collection in KNOWN_SOURCES:
50
+ values.update(KNOWN_SOURCES[collection])
51
+ elif collection in DEPRECATED_ARCHIVES:
52
+ warnings.warn(
53
+ f"deprecated archive '{collection}' found",
54
+ category=DeprecationWarning,
55
+ stacklevel=2,
56
+ )
57
+ values.update(DEPRECATED_ARCHIVES[collection])
58
+ return values
59
+
60
+ @model_validator(mode="after")
61
+ def verify_mappers(self) -> Sentinel2Source:
62
+ # make sure all required mappers are registered
63
+ self.get_id_mapper()
64
+ self.get_stac_metadata_mappers()
65
+ self.get_s2metadata_mapper()
66
+ return self
67
+
68
+ def get_id_mapper(self) -> Union[Callable[[Item], Item], None]:
69
+ if self.catalog_type == "static":
70
+ return None
71
+ for key in MAPPER_REGISTRIES["ID"].keys():
72
+ if self.collection == known_collection_to_url(key):
73
+ return MAPPER_REGISTRIES["ID"][key]
74
+ else:
75
+ raise ValueError(f"no ID mapper for {self.collection} found")
76
+
77
+ def get_stac_metadata_mappers(self) -> List[Callable[[Item], Item]]:
78
+ """Find mapper function.
79
+
80
+ A mapper function must be provided if a custom data_archive was configured.
81
+ """
82
+ mappers: List[Callable] = []
83
+ if self.catalog_type == "static":
84
+ return mappers
85
+ for key in MAPPER_REGISTRIES["STAC metadata"]:
86
+ if isinstance(key, tuple):
87
+ collection, data_archive = key
88
+ if (
89
+ self.collection == known_collection_to_url(collection)
90
+ and data_archive == self.data_archive
91
+ ):
92
+ mappers.append(MAPPER_REGISTRIES["STAC metadata"][key])
93
+ elif self.collection == known_collection_to_url(key):
94
+ mappers.append(MAPPER_REGISTRIES["STAC metadata"][key])
95
+ if mappers or self.data_archive is None:
96
+ return mappers
97
+ raise ValueError(
98
+ f"no STAC metadata mapper from {self.collection} to {self.data_archive} found"
99
+ )
100
+
101
+ def get_s2metadata_mapper(self) -> Union[Callable[[Item], S2Metadata], None]:
102
+ if self.catalog_type == "static" or self.metadata_archive is None:
103
+ return None
104
+ for key in MAPPER_REGISTRIES["S2Metadata"]:
105
+ collection, metadata_archive = key
106
+ if (
107
+ self.collection == known_collection_to_url(collection)
108
+ and metadata_archive == self.metadata_archive
109
+ ):
110
+ return MAPPER_REGISTRIES["S2Metadata"][key]
111
+ else:
112
+ raise ValueError(
113
+ f"no S2Metadata mapper from {self.collection} to {self.metadata_archive} found"
114
+ )
@@ -1,4 +1,5 @@
1
1
  from enum import Enum
2
+ from typing import Literal
2
3
 
3
4
  Resolution = Enum(
4
5
  "Resolution",
@@ -107,3 +108,7 @@ class SceneClassification(int, Enum):
107
108
  cloud_high_probability = 9
108
109
  thin_cirrus = 10
109
110
  snow = 11
111
+
112
+
113
+ DataArchive = Literal["AWSCOG", "AWSJP2"]
114
+ MetadataArchive = Literal["roda", "CDSE"]
@@ -181,15 +181,19 @@ def gradient_merge(
181
181
  # footprint coverage)
182
182
  # set 1 to 0:
183
183
  gradient_1band[gradient_1band == 1] = 0
184
- logger.debug(f"gradient_1band: {gradient_1band}")
184
+ logger.debug(
185
+ f"gradient_1band; min: {np.min(gradient_1band)}, max: {np.max(gradient_1band)}"
186
+ )
185
187
 
186
188
  # extrude array to match number of raster bands
187
189
  gradient_8bit = np.stack([gradient_1band for _ in range(raster.shape[0])])
188
- logger.debug(f"gradient_8bit: {gradient_8bit}")
190
+ logger.debug(
191
+ f"gradient_8bit; min: {np.min(gradient_8bit)}, max: {np.max(gradient_8bit)}"
192
+ )
189
193
 
190
194
  # scale gradient from 0 to 1
191
195
  gradient = gradient_8bit / 255
192
- logger.debug(f"gradient: {gradient}")
196
+ logger.debug(f"gradient; min: {np.min(gradient)} , max: {np.max(gradient)}")
193
197
 
194
198
  # now only apply the gradient where out and raster have values
195
199
  # otherwise pick the remaining existing value or keep a masked