mapchete-eo 2025.7.0__py2.py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (87) hide show
  1. mapchete_eo/__init__.py +1 -0
  2. mapchete_eo/archives/__init__.py +0 -0
  3. mapchete_eo/archives/base.py +65 -0
  4. mapchete_eo/array/__init__.py +0 -0
  5. mapchete_eo/array/buffer.py +16 -0
  6. mapchete_eo/array/color.py +29 -0
  7. mapchete_eo/array/convert.py +157 -0
  8. mapchete_eo/base.py +528 -0
  9. mapchete_eo/blacklist.txt +175 -0
  10. mapchete_eo/cli/__init__.py +30 -0
  11. mapchete_eo/cli/bounds.py +22 -0
  12. mapchete_eo/cli/options_arguments.py +243 -0
  13. mapchete_eo/cli/s2_brdf.py +77 -0
  14. mapchete_eo/cli/s2_cat_results.py +146 -0
  15. mapchete_eo/cli/s2_find_broken_products.py +93 -0
  16. mapchete_eo/cli/s2_jp2_static_catalog.py +166 -0
  17. mapchete_eo/cli/s2_mask.py +71 -0
  18. mapchete_eo/cli/s2_mgrs.py +45 -0
  19. mapchete_eo/cli/s2_rgb.py +114 -0
  20. mapchete_eo/cli/s2_verify.py +129 -0
  21. mapchete_eo/cli/static_catalog.py +123 -0
  22. mapchete_eo/eostac.py +30 -0
  23. mapchete_eo/exceptions.py +87 -0
  24. mapchete_eo/geometry.py +271 -0
  25. mapchete_eo/image_operations/__init__.py +12 -0
  26. mapchete_eo/image_operations/color_correction.py +136 -0
  27. mapchete_eo/image_operations/compositing.py +247 -0
  28. mapchete_eo/image_operations/dtype_scale.py +43 -0
  29. mapchete_eo/image_operations/fillnodata.py +130 -0
  30. mapchete_eo/image_operations/filters.py +319 -0
  31. mapchete_eo/image_operations/linear_normalization.py +81 -0
  32. mapchete_eo/image_operations/sigmoidal.py +114 -0
  33. mapchete_eo/io/__init__.py +37 -0
  34. mapchete_eo/io/assets.py +492 -0
  35. mapchete_eo/io/items.py +147 -0
  36. mapchete_eo/io/levelled_cubes.py +228 -0
  37. mapchete_eo/io/path.py +144 -0
  38. mapchete_eo/io/products.py +413 -0
  39. mapchete_eo/io/profiles.py +45 -0
  40. mapchete_eo/known_catalogs.py +42 -0
  41. mapchete_eo/platforms/sentinel2/__init__.py +17 -0
  42. mapchete_eo/platforms/sentinel2/archives.py +190 -0
  43. mapchete_eo/platforms/sentinel2/bandpass_adjustment.py +104 -0
  44. mapchete_eo/platforms/sentinel2/brdf/__init__.py +8 -0
  45. mapchete_eo/platforms/sentinel2/brdf/config.py +32 -0
  46. mapchete_eo/platforms/sentinel2/brdf/correction.py +260 -0
  47. mapchete_eo/platforms/sentinel2/brdf/hls.py +251 -0
  48. mapchete_eo/platforms/sentinel2/brdf/models.py +44 -0
  49. mapchete_eo/platforms/sentinel2/brdf/protocols.py +27 -0
  50. mapchete_eo/platforms/sentinel2/brdf/ross_thick.py +136 -0
  51. mapchete_eo/platforms/sentinel2/brdf/sun_angle_arrays.py +76 -0
  52. mapchete_eo/platforms/sentinel2/config.py +181 -0
  53. mapchete_eo/platforms/sentinel2/driver.py +78 -0
  54. mapchete_eo/platforms/sentinel2/masks.py +325 -0
  55. mapchete_eo/platforms/sentinel2/metadata_parser.py +734 -0
  56. mapchete_eo/platforms/sentinel2/path_mappers/__init__.py +29 -0
  57. mapchete_eo/platforms/sentinel2/path_mappers/base.py +56 -0
  58. mapchete_eo/platforms/sentinel2/path_mappers/earthsearch.py +34 -0
  59. mapchete_eo/platforms/sentinel2/path_mappers/metadata_xml.py +135 -0
  60. mapchete_eo/platforms/sentinel2/path_mappers/sinergise.py +105 -0
  61. mapchete_eo/platforms/sentinel2/preprocessing_tasks.py +26 -0
  62. mapchete_eo/platforms/sentinel2/processing_baseline.py +160 -0
  63. mapchete_eo/platforms/sentinel2/product.py +669 -0
  64. mapchete_eo/platforms/sentinel2/types.py +109 -0
  65. mapchete_eo/processes/__init__.py +0 -0
  66. mapchete_eo/processes/config.py +51 -0
  67. mapchete_eo/processes/dtype_scale.py +112 -0
  68. mapchete_eo/processes/eo_to_xarray.py +19 -0
  69. mapchete_eo/processes/merge_rasters.py +235 -0
  70. mapchete_eo/product.py +278 -0
  71. mapchete_eo/protocols.py +56 -0
  72. mapchete_eo/search/__init__.py +14 -0
  73. mapchete_eo/search/base.py +222 -0
  74. mapchete_eo/search/config.py +42 -0
  75. mapchete_eo/search/s2_mgrs.py +314 -0
  76. mapchete_eo/search/stac_search.py +251 -0
  77. mapchete_eo/search/stac_static.py +236 -0
  78. mapchete_eo/search/utm_search.py +251 -0
  79. mapchete_eo/settings.py +24 -0
  80. mapchete_eo/sort.py +48 -0
  81. mapchete_eo/time.py +53 -0
  82. mapchete_eo/types.py +73 -0
  83. mapchete_eo-2025.7.0.dist-info/METADATA +38 -0
  84. mapchete_eo-2025.7.0.dist-info/RECORD +87 -0
  85. mapchete_eo-2025.7.0.dist-info/WHEEL +5 -0
  86. mapchete_eo-2025.7.0.dist-info/entry_points.txt +11 -0
  87. mapchete_eo-2025.7.0.dist-info/licenses/LICENSE +21 -0
@@ -0,0 +1,413 @@
1
+ from __future__ import annotations
2
+
3
+ from contextlib import contextmanager
4
+ import logging
5
+ from collections import defaultdict
6
+ from datetime import datetime
7
+ import gc
8
+ from typing import Any, Dict, Generator, Iterator, List, Optional, Sequence
9
+
10
+ from mapchete import Timer
11
+ import numpy as np
12
+ import numpy.ma as ma
13
+ import xarray as xr
14
+ from mapchete.config import get_hash
15
+ from mapchete.geometry import to_shape
16
+ from mapchete.protocols import GridProtocol
17
+ from mapchete.types import NodataVals
18
+ from rasterio.enums import Resampling
19
+ from shapely.geometry import mapping
20
+ from shapely.ops import unary_union
21
+
22
+ from mapchete_eo.array.convert import to_dataarray, to_masked_array
23
+ from mapchete_eo.exceptions import (
24
+ AssetKeyError,
25
+ CorruptedProduct,
26
+ CorruptedSlice,
27
+ EmptySliceException,
28
+ EmptyStackException,
29
+ NoSourceProducts,
30
+ )
31
+ from mapchete_eo.io.items import get_item_property
32
+ from mapchete_eo.protocols import EOProductProtocol
33
+ from mapchete_eo.sort import SortMethodConfig
34
+ from mapchete_eo.types import MergeMethod
35
+
36
+
37
+ logger = logging.getLogger(__name__)
38
+
39
+
40
+ def products_to_np_array(
41
+ products: List[EOProductProtocol],
42
+ assets: Optional[List[str]] = None,
43
+ eo_bands: Optional[List[str]] = None,
44
+ grid: Optional[GridProtocol] = None,
45
+ resampling: Resampling = Resampling.nearest,
46
+ nodatavals: NodataVals = None,
47
+ merge_products_by: Optional[str] = None,
48
+ merge_method: MergeMethod = MergeMethod.first,
49
+ sort: Optional[SortMethodConfig] = None,
50
+ product_read_kwargs: dict = {},
51
+ raise_empty: bool = True,
52
+ ) -> ma.MaskedArray:
53
+ """Read grid window of EOProducts and merge into a 4D xarray."""
54
+ return ma.stack(
55
+ [
56
+ to_masked_array(s)
57
+ for s in generate_slice_dataarrays(
58
+ products=products,
59
+ assets=assets,
60
+ eo_bands=eo_bands,
61
+ grid=grid,
62
+ resampling=resampling,
63
+ nodatavals=nodatavals,
64
+ merge_products_by=merge_products_by,
65
+ merge_method=merge_method,
66
+ sort=sort,
67
+ product_read_kwargs=product_read_kwargs,
68
+ raise_empty=raise_empty,
69
+ )
70
+ ]
71
+ )
72
+
73
+
74
+ def products_to_xarray(
75
+ products: List[EOProductProtocol],
76
+ assets: Optional[List[str]] = None,
77
+ eo_bands: Optional[List[str]] = None,
78
+ grid: Optional[GridProtocol] = None,
79
+ resampling: Resampling = Resampling.nearest,
80
+ nodatavals: NodataVals = None,
81
+ slice_axis_name: str = "time",
82
+ band_axis_name: str = "bands",
83
+ x_axis_name: str = "x",
84
+ y_axis_name: str = "y",
85
+ merge_products_by: Optional[str] = None,
86
+ merge_method: MergeMethod = MergeMethod.first,
87
+ sort: Optional[SortMethodConfig] = None,
88
+ raise_empty: bool = True,
89
+ product_read_kwargs: dict = {},
90
+ ) -> xr.Dataset:
91
+ """Read grid window of EOProducts and merge into a 4D xarray."""
92
+ data_vars = [
93
+ s
94
+ for s in generate_slice_dataarrays(
95
+ products=products,
96
+ assets=assets,
97
+ eo_bands=eo_bands,
98
+ grid=grid,
99
+ resampling=resampling,
100
+ nodatavals=nodatavals,
101
+ merge_products_by=merge_products_by,
102
+ merge_method=merge_method,
103
+ sort=sort,
104
+ product_read_kwargs=product_read_kwargs,
105
+ raise_empty=raise_empty,
106
+ )
107
+ ]
108
+ if merge_products_by and merge_products_by not in ["date", "datetime"]:
109
+ coords = {merge_products_by: [s.name for s in data_vars]}
110
+ slice_axis_name = merge_products_by
111
+ else:
112
+ coords = {
113
+ slice_axis_name: list(
114
+ np.array(
115
+ [product.item.datetime for product in products], dtype=np.datetime64
116
+ )
117
+ )
118
+ }
119
+ return xr.Dataset(
120
+ data_vars={s.name: s for s in data_vars},
121
+ coords=coords,
122
+ ).transpose(slice_axis_name, band_axis_name, x_axis_name, y_axis_name)
123
+
124
+
125
+ class Slice:
126
+ """Combine multiple products into one slice."""
127
+
128
+ name: Any
129
+ properties: dict
130
+ products: Sequence[EOProductProtocol]
131
+ datetime: datetime
132
+
133
+ def __init__(
134
+ self,
135
+ name: Any,
136
+ products: Sequence[EOProductProtocol],
137
+ ):
138
+ self.name = name
139
+
140
+ # a Slice can only be valid if it contains one or more products
141
+ if products:
142
+ self.products = products
143
+ else: # pragma: no cover
144
+ raise ValueError("at least one product must be provided.")
145
+
146
+ # calculate mean datetime
147
+ timestamps = [
148
+ product.item.datetime.timestamp()
149
+ for product in self.products
150
+ if product.item.datetime
151
+ ]
152
+ mean_timestamp = sum(timestamps) / len(timestamps)
153
+ self.datetime = datetime.fromtimestamp(mean_timestamp)
154
+
155
+ # generate combined properties
156
+ self.properties = {}
157
+ for key in self.products[0].item.properties.keys():
158
+ try:
159
+ self.properties[key] = self.get_property(key)
160
+ except ValueError:
161
+ self.properties[key] = None
162
+
163
+ def __repr__(self) -> str:
164
+ return f"<Slice {self.name} ({len(self.products)} products)>"
165
+
166
+ @property
167
+ def __geom_interface__(self) -> Dict:
168
+ if self.products:
169
+ return mapping(
170
+ unary_union([to_shape(product) for product in self.products])
171
+ )
172
+
173
+ raise EmptySliceException
174
+
175
+ @contextmanager
176
+ def cached(self) -> Generator[Slice, None, None]:
177
+ """Clear caches and run garbage collector when context manager is closed."""
178
+ yield self
179
+ with Timer() as tt:
180
+ self.clear_cached_data()
181
+ gc.collect()
182
+ logger.debug("Slice cache cleared and garbage collected in %s", tt)
183
+
184
+ def clear_cached_data(self):
185
+ logger.debug("clear caches of all products in slice")
186
+ for product in self.products:
187
+ product.clear_cached_data()
188
+
189
+ def get_property(self, property: str) -> Any:
190
+ """
191
+ Return merged property over all products.
192
+
193
+ If property values are the same over all products, it will be returned. Otherwise a
194
+ ValueError is raised.
195
+ """
196
+ # if set of value hashes has a length of 1, all values are the same
197
+ values = [
198
+ get_hash(get_item_property(product.item, property=property))
199
+ for product in self.products
200
+ ]
201
+ if len(set(values)) == 1:
202
+ return get_item_property(self.products[0].item, property=property)
203
+
204
+ raise ValueError(
205
+ f"cannot get unique property {property} from products {self.products}"
206
+ )
207
+
208
+ def read(
209
+ self,
210
+ merge_method: MergeMethod = MergeMethod.first,
211
+ product_read_kwargs: dict = {},
212
+ raise_empty: bool = True,
213
+ ) -> ma.MaskedArray:
214
+ logger.debug("Slice: read from %s products", len(self.products))
215
+ return merge_products(
216
+ products=self.products,
217
+ merge_method=merge_method,
218
+ product_read_kwargs=product_read_kwargs,
219
+ raise_empty=raise_empty,
220
+ )
221
+
222
+
223
+ def products_to_slices(
224
+ products: List[EOProductProtocol],
225
+ group_by_property: Optional[str] = None,
226
+ sort: Optional[SortMethodConfig] = None,
227
+ ) -> List[Slice]:
228
+ """Group products per given property into Slice objects and optionally sort slices."""
229
+ if group_by_property:
230
+ grouped = defaultdict(list)
231
+ for product in products:
232
+ grouped[product.get_property(group_by_property)].append(product)
233
+ slices = [Slice(key, products) for key, products in grouped.items()]
234
+ else:
235
+ slices = [Slice(product.item.id, [product]) for product in products]
236
+
237
+ # also check if slices is even a list, otherwise it will raise an error
238
+ if sort and slices:
239
+ sort_dict = sort.model_dump()
240
+ func = sort_dict.pop("func")
241
+ slices = func(slices, **sort_dict)
242
+
243
+ return slices
244
+
245
+
246
+ def merge_products(
247
+ products: Sequence[EOProductProtocol],
248
+ merge_method: MergeMethod = MergeMethod.first,
249
+ product_read_kwargs: dict = {},
250
+ raise_empty: bool = True,
251
+ ) -> ma.MaskedArray:
252
+ """
253
+ Merge given products into one array.
254
+ """
255
+
256
+ def read_remaining_valid_products(
257
+ products_iter: Iterator[EOProductProtocol], product_read_kwargs: dict
258
+ ) -> Generator[ma.MaskedArray, None, None]:
259
+ """Yields and reads remaining products from iterator while discarding corrupt products."""
260
+ try:
261
+ for product in products_iter:
262
+ try:
263
+ yield product.read_np_array(**product_read_kwargs)
264
+ except (AssetKeyError, CorruptedProduct) as exc:
265
+ logger.debug("skip product %s because of %s", product.item.id, exc)
266
+ except StopIteration:
267
+ return
268
+
269
+ if len(products) == 0: # pragma: no cover
270
+ raise NoSourceProducts("no products to merge")
271
+
272
+ # we need to deactivate raising the EmptyProductException
273
+ product_read_kwargs.update(raise_empty=False)
274
+
275
+ products_iter = iter(products)
276
+
277
+ # read first valid product
278
+ for product in products_iter:
279
+ try:
280
+ out = product.read_np_array(**product_read_kwargs)
281
+ break
282
+ except (AssetKeyError, CorruptedProduct) as exc:
283
+ logger.debug("skip product %s because of %s", product.item.id, exc)
284
+ else:
285
+ # we cannot do anything here, as all products are broken
286
+ raise CorruptedSlice("all products are broken here")
287
+
288
+ # fill in gaps sequentially, product by product
289
+ if merge_method == MergeMethod.first:
290
+ for new in read_remaining_valid_products(products_iter, product_read_kwargs):
291
+ masked = out.mask
292
+ # Update values at masked locations
293
+ out[masked] = new[masked]
294
+ # Update mask at masked locations (e.g., unmask them)
295
+ out.mask[masked] = new.mask[masked]
296
+ # if whole output array is filled, there is no point in reading more data
297
+ if not out.mask.any():
298
+ return out
299
+
300
+ # read all and average
301
+ elif merge_method == MergeMethod.average:
302
+
303
+ def _generate_arrays(
304
+ first_product_array: ma.MaskedArray,
305
+ remaining_product_arrays: Generator[ma.MaskedArray, None, None],
306
+ ) -> Generator[ma.MaskedArray, None, None]:
307
+ """Yield all available product arrays."""
308
+ yield first_product_array
309
+ yield from remaining_product_arrays
310
+
311
+ # explicitly specify dtype to avoid casting of integer arrays to floats
312
+ # during mean conversion:
313
+ # https://numpy.org/doc/stable/reference/generated/numpy.mean.html#numpy.mean
314
+ arrays = list(
315
+ _generate_arrays(
316
+ out,
317
+ read_remaining_valid_products(products_iter, product_read_kwargs),
318
+ )
319
+ )
320
+
321
+ # Filter out arrays that are entirely masked
322
+ valid_arrays = [a for a in arrays if not ma.getmaskarray(a).all()]
323
+
324
+ if valid_arrays:
325
+ stacked = ma.stack(valid_arrays, dtype=out.dtype)
326
+ out = stacked.mean(axis=0, dtype=out.dtype)
327
+ else:
328
+ # All arrays were fully masked — return fully masked output
329
+ out = ma.masked_all(out.shape, dtype=out.dtype)
330
+
331
+ else: # pragma: no cover
332
+ raise NotImplementedError(f"unknown merge method: {merge_method}")
333
+
334
+ if raise_empty and out.mask.all():
335
+ raise EmptySliceException(
336
+ f"slice is empty after combining {len(products)} products"
337
+ )
338
+
339
+ return out
340
+
341
+
342
+ def generate_slice_dataarrays(
343
+ products: List[EOProductProtocol],
344
+ assets: Optional[List[str]] = None,
345
+ eo_bands: Optional[List[str]] = None,
346
+ grid: Optional[GridProtocol] = None,
347
+ resampling: Resampling = Resampling.nearest,
348
+ nodatavals: NodataVals = None,
349
+ merge_products_by: Optional[str] = None,
350
+ merge_method: MergeMethod = MergeMethod.first,
351
+ sort: Optional[SortMethodConfig] = None,
352
+ product_read_kwargs: dict = {},
353
+ raise_empty: bool = True,
354
+ ) -> Iterator[xr.DataArray]:
355
+ """
356
+ Yield products or merged products into slices as DataArrays.
357
+ """
358
+ if len(products) == 0:
359
+ raise NoSourceProducts("no products to read")
360
+
361
+ stack_empty = True
362
+ assets = assets or []
363
+ eo_bands = eo_bands or []
364
+ variables = assets or eo_bands
365
+
366
+ # group products into slices and sort slices if configured
367
+ slices = products_to_slices(
368
+ products, group_by_property=merge_products_by, sort=sort
369
+ )
370
+
371
+ logger.debug(
372
+ "reading %s products in %s groups...",
373
+ len(products),
374
+ len(slices),
375
+ )
376
+ if isinstance(nodatavals, list):
377
+ nodataval = nodatavals[0]
378
+ elif isinstance(nodatavals, float):
379
+ nodataval = nodatavals
380
+ else:
381
+ nodataval = nodatavals
382
+ for slice in slices:
383
+ try:
384
+ # if merge_products_by is none, each slice contains just one product
385
+ # so nothing will have to be merged anyways
386
+ with slice.cached():
387
+ yield to_dataarray(
388
+ merge_products(
389
+ products=slice.products,
390
+ merge_method=merge_method,
391
+ product_read_kwargs=dict(
392
+ product_read_kwargs,
393
+ assets=assets,
394
+ eo_bands=eo_bands,
395
+ grid=grid,
396
+ resampling=resampling,
397
+ nodatavals=nodatavals,
398
+ raise_empty=raise_empty,
399
+ ),
400
+ raise_empty=raise_empty,
401
+ ),
402
+ nodataval=nodataval,
403
+ name=slice.name,
404
+ band_names=variables,
405
+ attrs=slice.properties,
406
+ )
407
+ # if at least one slice can be yielded, the stack is not empty
408
+ stack_empty = False
409
+ except (EmptySliceException, CorruptedSlice):
410
+ pass
411
+
412
+ if stack_empty:
413
+ raise EmptyStackException("all slices are empty")
@@ -0,0 +1,45 @@
1
+ from rasterio.profiles import Profile
2
+
3
+
4
+ class COGDeflateProfile(Profile):
5
+ """Standard COG profile."""
6
+
7
+ defaults = {
8
+ "driver": "COG",
9
+ "tiled": True,
10
+ "blockxsize": 512,
11
+ "blockysize": 512,
12
+ "compress": "DEFLATE",
13
+ }
14
+
15
+
16
+ class JP2LossyProfile(Profile):
17
+ """Very lossy JP2 profile used for low size test data."""
18
+
19
+ defaults = {
20
+ "driver": "JP2OpenJPEG",
21
+ "tiled": True,
22
+ "blockxsize": 512,
23
+ "blockysize": 512,
24
+ "quality": 50,
25
+ }
26
+
27
+
28
+ class JP2LosslessProfile(Profile):
29
+ """Lossless JP2 profile used for lower size data."""
30
+
31
+ defaults = {
32
+ "driver": "JP2OpenJPEG",
33
+ "tiled": True,
34
+ "blockxsize": 512,
35
+ "blockysize": 512,
36
+ "quality": 100,
37
+ "reversible": True,
38
+ }
39
+
40
+
41
+ rio_profiles = {
42
+ "cog_deflate": COGDeflateProfile(),
43
+ "jp2_lossy": JP2LossyProfile(),
44
+ "jp2_lossless": JP2LosslessProfile(),
45
+ }
@@ -0,0 +1,42 @@
1
+ """
2
+ Catalogs define access to a search interface which provide products
3
+ as pystac Items.
4
+ """
5
+
6
+ from typing import List
7
+
8
+ from mapchete_eo.search import STACSearchCatalog, UTMSearchCatalog
9
+
10
+
11
+ class EarthSearchV1S2L2A(STACSearchCatalog):
12
+ """Earth-Search catalog for Sentinel-2 Level 2A COGs."""
13
+
14
+ endpoint: str = "https://earth-search.aws.element84.com/v1/"
15
+
16
+
17
+ class CDSESearch(STACSearchCatalog):
18
+ """Copernicus Data Space Ecosystem (CDSE) STAC API."""
19
+
20
+ endpoint: str = "https://stac.dataspace.copernicus.eu/v1"
21
+
22
+
23
+ class PlanetaryComputerSearch(STACSearchCatalog):
24
+ """Planetary Computer Search."""
25
+
26
+ endpoint: str = "https://planetarycomputer.microsoft.com/api/stac/v1/"
27
+
28
+
29
+ class AWSSearchCatalogS2L2A(UTMSearchCatalog):
30
+ """
31
+ Not a search endpoint, just hanging STAC collection with items separately.
32
+ Need custom parser/browser to find scenes based on date and UTM MGRS Granule
33
+
34
+ https://sentinel-s2-l2a-stac.s3.amazonaws.com/sentinel-s2-l2a.json
35
+ """
36
+
37
+ id: str = "sentinel-s2-l2a"
38
+ endpoint: str = "s3://sentinel-s2-l2a-stac/"
39
+ day_subdir_schema: str = "{year}/{month:02d}/{day:02d}"
40
+ stac_json_endswith: str = "T{tile_id}.json"
41
+ description: str = "Sentinel-2 L2A JPEG2000 archive on AWS."
42
+ stac_extensions: List[str] = []
@@ -0,0 +1,17 @@
1
+ from mapchete_eo.platforms.sentinel2.driver import (
2
+ METADATA,
3
+ InputData,
4
+ Sentinel2Cube,
5
+ Sentinel2CubeGroup,
6
+ )
7
+ from mapchete_eo.platforms.sentinel2.metadata_parser import S2Metadata
8
+ from mapchete_eo.platforms.sentinel2.product import S2Product
9
+
10
+ __all__ = [
11
+ "S2Metadata",
12
+ "METADATA",
13
+ "InputData",
14
+ "Sentinel2Cube",
15
+ "Sentinel2CubeGroup",
16
+ "S2Product",
17
+ ]
@@ -0,0 +1,190 @@
1
+ from __future__ import annotations
2
+
3
+ from enum import Enum
4
+ from typing import Any, Type
5
+
6
+ from mapchete.path import MPath
7
+ from pydantic import ValidationError
8
+ from pydantic.functional_validators import BeforeValidator
9
+ from pystac import Item
10
+ from typing_extensions import Annotated
11
+
12
+ from mapchete_eo.archives.base import Archive
13
+ from mapchete_eo.io.items import item_fix_footprint
14
+ from mapchete_eo.known_catalogs import (
15
+ AWSSearchCatalogS2L2A,
16
+ CDSESearch,
17
+ EarthSearchV1S2L2A,
18
+ )
19
+ from mapchete_eo.platforms.sentinel2.types import ProcessingLevel
20
+ from mapchete_eo.search.s2_mgrs import S2Tile
21
+
22
+
23
+ def known_archive(v: Any, **args) -> Type[Archive]:
24
+ if isinstance(v, str):
25
+ return KnownArchives[v].value
26
+ elif isinstance(v, type(Archive)):
27
+ return v
28
+ else:
29
+ raise ValidationError(f"cannot validate {v} to archive")
30
+
31
+
32
+ ArchiveClsFromString = Annotated[Type[Archive], BeforeValidator(known_archive)]
33
+
34
+
35
+ def add_datastrip_id(item: Item) -> Item:
36
+ """Make sure item metadata is following the standard."""
37
+ # change 'sentinel2' prefix to 's2'
38
+ properties = {k.replace("sentinel2:", "s2:"): v for k, v in item.properties.items()}
39
+
40
+ # add datastrip id as 's2:datastrip_id'
41
+ if not properties.get("s2:datastrip_id"):
42
+ from mapchete_eo.platforms.sentinel2 import S2Metadata
43
+
44
+ s2_metadata = S2Metadata.from_stac_item(item)
45
+ properties["s2:datastrip_id"] = s2_metadata.datastrip_id
46
+
47
+ item.properties = properties
48
+ return item
49
+
50
+
51
+ def map_cdse_paths_to_jp2_archive(item: Item) -> Item:
52
+ """
53
+ CSDE has the following assets:
54
+ AOT_10m, AOT_20m, AOT_60m, B01_20m, B01_60m, B02_10m, B02_20m, B02_60m, B03_10m, B03_20m,
55
+ B03_60m, B04_10m, B04_20m, B04_60m, B05_20m, B05_60m, B06_20m, B06_60m, B07_20m, B07_60m,
56
+ B08_10m, B09_60m, B11_20m, B11_60m, B12_20m, B12_60m, B8A_20m, B8A_60m, Product, SCL_20m,
57
+ SCL_60m, TCI_10m, TCI_20m, TCI_60m, WVP_10m, WVP_20m, WVP_60m, thumbnail, safe_manifest,
58
+ granule_metadata, inspire_metadata, product_metadata, datastrip_metadata
59
+
60
+ sample path for AWS JP2:
61
+ s3://sentinel-s2-l2a/tiles/51/K/XR/2020/7/31/0/R10m/
62
+ """
63
+ band_name_mapping = {
64
+ "AOT_10m": "aot",
65
+ "B01_20m": "coastal",
66
+ "B02_10m": "blue",
67
+ "B03_10m": "green",
68
+ "B04_10m": "red",
69
+ "B05_20m": "rededge1",
70
+ "B06_20m": "rededge2",
71
+ "B07_20m": "rededge3",
72
+ "B08_10m": "nir",
73
+ "B09_60m": "nir09",
74
+ "B11_20m": "swir16",
75
+ "B12_20m": "swir22",
76
+ "B8A_20m": "nir08",
77
+ "SCL_20m": "scl",
78
+ "TCI_10m": "visual",
79
+ "WVP_10m": "wvp",
80
+ }
81
+ path_base_scheme = "s3://sentinel-s2-l2a/tiles/{utm_zone}/{latitude_band}/{grid_square}/{year}/{month}/{day}/{count}"
82
+ s2tile = S2Tile.from_grid_code(item.properties["grid:code"])
83
+ if item.datetime is None:
84
+ raise ValueError(f"product {item.get_self_href()} does not have a timestamp")
85
+ product_basepath = MPath(
86
+ path_base_scheme.format(
87
+ utm_zone=s2tile.utm_zone,
88
+ latitude_band=s2tile.latitude_band,
89
+ grid_square=s2tile.grid_square,
90
+ year=item.datetime.year,
91
+ month=item.datetime.month,
92
+ day=item.datetime.day,
93
+ count=0, # TODO: get count dynamically from metadata
94
+ )
95
+ )
96
+ new_assets = {}
97
+ for asset_name, asset in item.assets.items():
98
+ # ignore these assets
99
+ if asset_name in [
100
+ "Product",
101
+ "safe_manifest",
102
+ "product_metadata",
103
+ "inspire_metadata",
104
+ "datastrip_metadata",
105
+ ]:
106
+ continue
107
+ # set thumbnnail
108
+ elif asset_name == "thumbnail":
109
+ asset.href = str(product_basepath / "R60m" / "TCI.jp2")
110
+ # point to proper metadata
111
+ elif asset_name == "granule_metadata":
112
+ asset.href = str(product_basepath / "metadata.xml")
113
+ # change band asset names and point to their new locations
114
+ elif asset_name in band_name_mapping:
115
+ name, resolution = asset_name.split("_")
116
+ asset.href = product_basepath / f"R{resolution}" / f"{name}.jp2"
117
+ asset_name = band_name_mapping[asset_name]
118
+ else:
119
+ continue
120
+ new_assets[asset_name] = asset
121
+
122
+ item.assets = new_assets
123
+
124
+ item.properties["s2:datastrip_id"] = item.properties.get("eopf:datastrip_id")
125
+ return item
126
+
127
+
128
+ class AWSL2ACOGv1(Archive):
129
+ """COG archive on AWS using E84 STAC search endpoint."""
130
+
131
+ catalog = EarthSearchV1S2L2A(
132
+ collections=["sentinel-2-l2a"],
133
+ )
134
+ item_modifier_funcs = [
135
+ item_fix_footprint,
136
+ ]
137
+ processing_level = ProcessingLevel.level2a
138
+
139
+
140
+ class AWSL2AJP2(Archive):
141
+ """
142
+ JP2000 archive on AWS using dumb S3 path guesser.
143
+ """
144
+
145
+ catalog = AWSSearchCatalogS2L2A(
146
+ collections=["sentinel-s2-l2a"],
147
+ )
148
+ item_modifier_funcs = [
149
+ item_fix_footprint,
150
+ add_datastrip_id,
151
+ ]
152
+ processing_level = ProcessingLevel.level2a
153
+
154
+
155
+ class AWSL2AJP2CSDE(Archive):
156
+ """
157
+ JP2000 archive on AWS using CDSE STAC search endpoint.
158
+ """
159
+
160
+ catalog = CDSESearch(
161
+ collections=["sentinel-2-l2a"],
162
+ )
163
+ item_modifier_funcs = [
164
+ item_fix_footprint,
165
+ map_cdse_paths_to_jp2_archive,
166
+ add_datastrip_id,
167
+ ]
168
+ processing_level = ProcessingLevel.level2a
169
+
170
+
171
+ class CDSEL2AJP2CSDE(Archive):
172
+ """
173
+ JP2000 archive on CDSE (EODATA s3) using CDSE STAC search endpoint.
174
+ """
175
+
176
+ catalog = CDSESearch(
177
+ collections=["sentinel-2-l2a"],
178
+ )
179
+ item_modifier_funcs = [
180
+ item_fix_footprint,
181
+ add_datastrip_id,
182
+ ]
183
+ processing_level = ProcessingLevel.level2a
184
+
185
+
186
+ class KnownArchives(Enum):
187
+ S2AWS_COG = AWSL2ACOGv1
188
+ S2AWS_JP2 = AWSL2AJP2
189
+ S2CDSE_AWSJP2 = AWSL2AJP2CSDE
190
+ S2CDSE_JP2 = CDSEL2AJP2CSDE