mapchete-eo 2025.10.1__py2.py3-none-any.whl → 2026.1.0__py2.py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (63) hide show
  1. mapchete_eo/__init__.py +1 -1
  2. mapchete_eo/base.py +94 -54
  3. mapchete_eo/cli/options_arguments.py +11 -27
  4. mapchete_eo/cli/s2_brdf.py +1 -1
  5. mapchete_eo/cli/s2_cat_results.py +4 -20
  6. mapchete_eo/cli/s2_find_broken_products.py +4 -20
  7. mapchete_eo/cli/s2_jp2_static_catalog.py +2 -2
  8. mapchete_eo/cli/static_catalog.py +4 -45
  9. mapchete_eo/eostac.py +1 -1
  10. mapchete_eo/io/assets.py +20 -16
  11. mapchete_eo/io/items.py +36 -23
  12. mapchete_eo/io/path.py +19 -8
  13. mapchete_eo/io/products.py +22 -24
  14. mapchete_eo/platforms/sentinel2/__init__.py +1 -1
  15. mapchete_eo/platforms/sentinel2/_mapper_registry.py +89 -0
  16. mapchete_eo/platforms/sentinel2/brdf/correction.py +1 -1
  17. mapchete_eo/platforms/sentinel2/brdf/hls.py +1 -1
  18. mapchete_eo/platforms/sentinel2/brdf/models.py +1 -1
  19. mapchete_eo/platforms/sentinel2/brdf/protocols.py +1 -1
  20. mapchete_eo/platforms/sentinel2/brdf/ross_thick.py +1 -1
  21. mapchete_eo/platforms/sentinel2/brdf/sun_angle_arrays.py +1 -1
  22. mapchete_eo/platforms/sentinel2/config.py +73 -13
  23. mapchete_eo/platforms/sentinel2/driver.py +0 -39
  24. mapchete_eo/platforms/sentinel2/metadata_parser/__init__.py +6 -0
  25. mapchete_eo/platforms/sentinel2/{path_mappers → metadata_parser}/base.py +1 -1
  26. mapchete_eo/platforms/sentinel2/{path_mappers/metadata_xml.py → metadata_parser/default_path_mapper.py} +2 -2
  27. mapchete_eo/platforms/sentinel2/metadata_parser/models.py +78 -0
  28. mapchete_eo/platforms/sentinel2/{metadata_parser.py → metadata_parser/s2metadata.py} +51 -144
  29. mapchete_eo/platforms/sentinel2/preconfigured_sources/__init__.py +57 -0
  30. mapchete_eo/platforms/sentinel2/preconfigured_sources/guessers.py +108 -0
  31. mapchete_eo/platforms/sentinel2/preconfigured_sources/item_mappers.py +171 -0
  32. mapchete_eo/platforms/sentinel2/preconfigured_sources/metadata_xml_mappers.py +217 -0
  33. mapchete_eo/platforms/sentinel2/preprocessing_tasks.py +22 -1
  34. mapchete_eo/platforms/sentinel2/processing_baseline.py +3 -0
  35. mapchete_eo/platforms/sentinel2/product.py +83 -18
  36. mapchete_eo/platforms/sentinel2/source.py +114 -0
  37. mapchete_eo/platforms/sentinel2/types.py +5 -0
  38. mapchete_eo/product.py +14 -8
  39. mapchete_eo/protocols.py +5 -0
  40. mapchete_eo/search/__init__.py +3 -3
  41. mapchete_eo/search/base.py +127 -99
  42. mapchete_eo/search/config.py +75 -4
  43. mapchete_eo/search/s2_mgrs.py +8 -9
  44. mapchete_eo/search/stac_search.py +99 -97
  45. mapchete_eo/search/stac_static.py +46 -102
  46. mapchete_eo/search/utm_search.py +54 -62
  47. mapchete_eo/settings.py +1 -0
  48. mapchete_eo/sort.py +4 -6
  49. mapchete_eo/source.py +107 -0
  50. {mapchete_eo-2025.10.1.dist-info → mapchete_eo-2026.1.0.dist-info}/METADATA +4 -3
  51. mapchete_eo-2026.1.0.dist-info/RECORD +89 -0
  52. {mapchete_eo-2025.10.1.dist-info → mapchete_eo-2026.1.0.dist-info}/WHEEL +1 -1
  53. {mapchete_eo-2025.10.1.dist-info → mapchete_eo-2026.1.0.dist-info}/entry_points.txt +1 -1
  54. {mapchete_eo-2025.10.1.dist-info → mapchete_eo-2026.1.0.dist-info}/licenses/LICENSE +1 -1
  55. mapchete_eo/archives/__init__.py +0 -0
  56. mapchete_eo/archives/base.py +0 -65
  57. mapchete_eo/geometry.py +0 -271
  58. mapchete_eo/known_catalogs.py +0 -42
  59. mapchete_eo/platforms/sentinel2/archives.py +0 -190
  60. mapchete_eo/platforms/sentinel2/path_mappers/__init__.py +0 -29
  61. mapchete_eo/platforms/sentinel2/path_mappers/earthsearch.py +0 -34
  62. mapchete_eo/platforms/sentinel2/path_mappers/sinergise.py +0 -105
  63. mapchete_eo-2025.10.1.dist-info/RECORD +0 -88
mapchete_eo/__init__.py CHANGED
@@ -1 +1 @@
1
- __version__ = "2025.10.1"
1
+ __version__ = "2026.1.0"
mapchete_eo/base.py CHANGED
@@ -1,8 +1,9 @@
1
1
  from __future__ import annotations
2
2
 
3
+ import warnings
3
4
  import logging
4
5
  from functools import cached_property
5
- from typing import Any, Callable, List, Optional, Type, Union
6
+ from typing import Any, Callable, List, Optional, Sequence, Type, Union, Dict, Generator
6
7
 
7
8
  import croniter
8
9
  from mapchete import Bounds
@@ -17,13 +18,13 @@ from mapchete.io.vector import IndexedFeatures
17
18
  from mapchete.path import MPath
18
19
  from mapchete.tile import BufferedTile
19
20
  from mapchete.types import MPathLike, NodataVal, NodataVals
20
- from pydantic import BaseModel
21
+ from pydantic import BaseModel, model_validator
22
+ from pystac import Item
21
23
  from rasterio.enums import Resampling
22
24
  from rasterio.features import geometry_mask
23
25
  from shapely.geometry import mapping
24
26
  from shapely.geometry.base import BaseGeometry
25
27
 
26
- from mapchete_eo.archives.base import Archive
27
28
  from mapchete_eo.exceptions import CorruptedProductMetadata, PreprocessingNotFinished
28
29
  from mapchete_eo.io import (
29
30
  products_to_np_array,
@@ -31,9 +32,9 @@ from mapchete_eo.io import (
31
32
  read_levelled_cube_to_np_array,
32
33
  read_levelled_cube_to_xarray,
33
34
  )
35
+ from mapchete_eo.source import Source
34
36
  from mapchete_eo.product import EOProduct
35
37
  from mapchete_eo.protocols import EOProductProtocol
36
- from mapchete_eo.search.stac_static import STACStaticCatalog
37
38
  from mapchete_eo.settings import mapchete_eo_settings
38
39
  from mapchete_eo.sort import SortMethodConfig, TargetDateSort
39
40
  from mapchete_eo.time import to_datetime
@@ -44,13 +45,39 @@ logger = logging.getLogger(__name__)
44
45
 
45
46
  class BaseDriverConfig(BaseModel):
46
47
  format: str
47
- time: Union[TimeRange, List[TimeRange]]
48
+ source: Sequence[Source]
49
+ time: Optional[Union[TimeRange, List[TimeRange]]] = None
48
50
  cat_baseurl: Optional[str] = None
49
51
  cache: Optional[Any] = None
50
52
  footprint_buffer: float = 0
51
53
  area: Optional[Union[MPathLike, dict, type[BaseGeometry]]] = None
52
54
  preprocessing_tasks: bool = False
53
- archive: Optional[Type[Archive]] = None
55
+ search_kwargs: Optional[Dict[str, Any]] = None
56
+
57
+ @model_validator(mode="before")
58
+ def to_list(cls, values: Dict[str, Any]) -> Dict[str, Any]:
59
+ """Expands source to list."""
60
+ for field in ["source"]:
61
+ value = values.get(field)
62
+ if value is not None and not isinstance(value, list):
63
+ values[field] = [value]
64
+ return values
65
+
66
+ @model_validator(mode="before")
67
+ def deprecate_cat_baseurl(cls, values: Dict[str, Any]) -> Dict[str, Any]:
68
+ cat_baseurl = values.get("cat_baseurl")
69
+ if cat_baseurl: # pragma: no cover
70
+ warnings.warn(
71
+ "'cat_baseurl' will be deprecated soon. Please use 'catalog_type=static' in the source.",
72
+ category=DeprecationWarning,
73
+ stacklevel=2,
74
+ )
75
+ if values.get("source", []):
76
+ raise ValueError(
77
+ "deprecated cat_baseurl field found alongside sources."
78
+ )
79
+ values["source"] = [dict(collection=cat_baseurl, catalog_type="static")]
80
+ return values
54
81
 
55
82
 
56
83
  class EODataCube(base.InputTile):
@@ -63,7 +90,7 @@ class EODataCube(base.InputTile):
63
90
 
64
91
  tile: BufferedTile
65
92
  eo_bands: dict
66
- time: List[TimeRange]
93
+ time: Optional[List[TimeRange]]
67
94
  area: BaseGeometry
68
95
  area_pixelbuffer: int = 0
69
96
 
@@ -72,7 +99,7 @@ class EODataCube(base.InputTile):
72
99
  tile: BufferedTile,
73
100
  products: Optional[List[EOProductProtocol]],
74
101
  eo_bands: dict,
75
- time: List[TimeRange],
102
+ time: Optional[List[TimeRange]] = None,
76
103
  input_key: Optional[str] = None,
77
104
  area: Optional[BaseGeometry] = None,
78
105
  **kwargs,
@@ -314,27 +341,25 @@ class EODataCube(base.InputTile):
314
341
  """
315
342
  Return a filtered list of input products.
316
343
  """
317
- if any([start_time, end_time, timestamps]):
344
+ if any([start_time, end_time, timestamps]): # pragma: no cover
318
345
  raise NotImplementedError("time subsets are not yet implemented")
319
346
 
320
347
  if time_pattern:
321
348
  # filter products by time pattern
322
- tz = tzutc()
323
- coord_time = [
324
- t.replace(tzinfo=tz)
325
- for t in croniter.croniter_range(
326
- to_datetime(self.start_time),
327
- to_datetime(self.end_time),
328
- time_pattern,
329
- )
330
- ]
331
349
  return [
332
350
  product
333
351
  for product in self.products
334
- if product.item.datetime in coord_time
352
+ if product.item.datetime
353
+ in [
354
+ t.replace(tzinfo=tzutc())
355
+ for t in croniter.croniter_range(
356
+ to_datetime(self.start_time),
357
+ to_datetime(self.end_time),
358
+ time_pattern,
359
+ )
360
+ ]
335
361
  ]
336
- else:
337
- return self.products
362
+ return self.products
338
363
 
339
364
  def is_empty(self) -> bool: # pragma: no cover
340
365
  """
@@ -358,16 +383,16 @@ class EODataCube(base.InputTile):
358
383
  nodatavals = self.default_read_nodataval
359
384
  merge_products_by = merge_products_by or self.default_read_merge_products_by
360
385
  merge_method = merge_method or self.default_read_merge_method
361
- if resampling is None:
362
- resampling = self.default_read_resampling
363
- else:
364
- resampling = (
365
- resampling
366
- if isinstance(resampling, Resampling)
367
- else Resampling[resampling]
368
- )
369
386
  return dict(
370
- resampling=resampling,
387
+ resampling=(
388
+ self.default_read_resampling
389
+ if resampling is None
390
+ else (
391
+ resampling
392
+ if isinstance(resampling, Resampling)
393
+ else Resampling[resampling]
394
+ )
395
+ ),
371
396
  nodatavals=nodatavals,
372
397
  merge_products_by=merge_products_by,
373
398
  merge_method=merge_method,
@@ -401,8 +426,7 @@ class InputData(base.InputData):
401
426
  default_preprocessing_task: Callable = staticmethod(EOProduct.from_stac_item)
402
427
  driver_config_model: Type[BaseDriverConfig] = BaseDriverConfig
403
428
  params: BaseDriverConfig
404
- archive: Archive
405
- time: Union[TimeRange, List[TimeRange]]
429
+ time: Optional[Union[TimeRange, List[TimeRange]]]
406
430
  area: BaseGeometry
407
431
  _products: Optional[IndexedFeatures] = None
408
432
 
@@ -421,6 +445,8 @@ class InputData(base.InputData):
421
445
  self.standalone = standalone
422
446
 
423
447
  self.params = self.driver_config_model(**input_params["abstract"])
448
+ self.conf_dir = input_params.get("conf_dir")
449
+
424
450
  # we have to make sure, the cache path is absolute
425
451
  # not quite fond of this solution
426
452
  if self.params.cache:
@@ -429,14 +455,18 @@ class InputData(base.InputData):
429
455
  ).absolute_path(base_dir=input_params.get("conf_dir"))
430
456
  self.area = self._init_area(input_params)
431
457
  self.time = self.params.time
432
- if self.readonly: # pragma: no cover
433
- return
434
458
 
435
- self.set_archive(base_dir=input_params["conf_dir"])
459
+ self.eo_bands = [
460
+ eo_band
461
+ for source in self.params.source
462
+ for eo_band in source.eo_bands(base_dir=self.conf_dir)
463
+ ]
436
464
 
465
+ if self.readonly: # pragma: no cover
466
+ return
437
467
  # don't use preprocessing tasks for Sentinel-2 products:
438
468
  if self.params.preprocessing_tasks or self.params.cache is not None:
439
- for item in self.archive.items():
469
+ for item in self.source_items():
440
470
  self.add_preprocessing_task(
441
471
  self.default_preprocessing_task,
442
472
  fargs=(item,),
@@ -455,7 +485,7 @@ class InputData(base.InputData):
455
485
  self.default_preprocessing_task(
456
486
  item, cache_config=self.params.cache, cache_all=True
457
487
  )
458
- for item in self.archive.items()
488
+ for item in self.source_items()
459
489
  ]
460
490
  )
461
491
 
@@ -467,7 +497,7 @@ class InputData(base.InputData):
467
497
  configured_area, configured_area_crs = guess_geometry(
468
498
  self.params.area,
469
499
  bounds=Bounds.from_inp(
470
- input_params.get("delimiters", {}).get("bounds"),
500
+ input_params.get("delimiters", {}).get("effective_bounds"),
471
501
  crs=getattr(input_params.get("pyramid"), "crs"),
472
502
  ),
473
503
  raise_if_empty=False,
@@ -481,20 +511,30 @@ class InputData(base.InputData):
481
511
  )
482
512
  return process_area
483
513
 
484
- def set_archive(self, base_dir: MPath):
485
- # this only works with some static archive:
486
- if self.params.cat_baseurl:
487
- self.archive = Archive(
488
- catalog=STACStaticCatalog(
489
- baseurl=MPath(self.params.cat_baseurl).absolute_path(
490
- base_dir=base_dir
491
- ),
492
- ),
493
- area=self.bbox(mapchete_eo_settings.default_catalog_crs),
494
- time=self.time,
514
+ def source_items(self) -> Generator[Item, None, None]:
515
+ already_returned = set()
516
+ for source in self.params.source:
517
+ area = reproject_geometry(
518
+ self.area,
519
+ src_crs=self.crs,
520
+ dst_crs=source.catalog_crs,
495
521
  )
496
- else:
497
- raise NotImplementedError()
522
+ if area.is_empty:
523
+ continue
524
+ for item in source.search(
525
+ time=self.time,
526
+ area=area,
527
+ base_dir=self.conf_dir,
528
+ ):
529
+ # if item was already found in previous source, skip
530
+ if item.id in already_returned:
531
+ continue
532
+
533
+ # if item is new, add to list and yield
534
+ already_returned.add(item.id)
535
+ item.properties["mapchete_eo:source"] = source
536
+ yield item
537
+ logger.debug("returned set of %s items", len(already_returned))
498
538
 
499
539
  def bbox(self, out_crs: Optional[str] = None) -> BaseGeometry:
500
540
  """Return data bounding box."""
@@ -517,7 +557,7 @@ class InputData(base.InputData):
517
557
  return self._products
518
558
 
519
559
  # TODO: copied it from mapchete_satellite, not yet sure which use case this is
520
- elif self.standalone:
560
+ elif self.standalone: # pragma: no cover
521
561
  raise NotImplementedError()
522
562
 
523
563
  # if preprocessing tasks are ready, index them for further use
@@ -525,7 +565,7 @@ class InputData(base.InputData):
525
565
  return IndexedFeatures(
526
566
  [
527
567
  self.get_preprocessing_task_result(item.id)
528
- for item in self.archive.items()
568
+ for item in self.source_items()
529
569
  if not isinstance(item, CorruptedProductMetadata)
530
570
  ],
531
571
  crs=self.crs,
@@ -557,7 +597,7 @@ class InputData(base.InputData):
557
597
  return self.input_tile_cls(
558
598
  tile,
559
599
  products=tile_products,
560
- eo_bands=self.archive.catalog.eo_bands,
600
+ eo_bands=self.eo_bands,
561
601
  time=self.time,
562
602
  # passing on the input key is essential so dependent preprocessing tasks can be found!
563
603
  input_key=self.input_key,
@@ -6,8 +6,8 @@ from mapchete.path import MPath
6
6
 
7
7
  from mapchete_eo.platforms.sentinel2.brdf.models import BRDFModels
8
8
  from mapchete_eo.io.profiles import rio_profiles
9
- from mapchete_eo.platforms.sentinel2.archives import KnownArchives
10
9
  from mapchete_eo.platforms.sentinel2.config import SceneClassification
10
+ from mapchete_eo.platforms.sentinel2.source import Sentinel2Source
11
11
  from mapchete_eo.platforms.sentinel2.types import L2ABand, Resolution
12
12
  from mapchete_eo.time import to_datetime
13
13
 
@@ -58,16 +58,15 @@ def _str_to_l2a_bands(_, __, value):
58
58
  return [L2ABand[v] for v in value.split(",")]
59
59
 
60
60
 
61
- def _archive_name_to_archive_cls(_, __, value):
61
+ def _str_to_datetime(_, param, value):
62
62
  if value:
63
- return KnownArchives[value]
63
+ return to_datetime(value)
64
+ raise ValueError(f"--{param.name} is mandatory")
64
65
 
65
66
 
66
- def _str_to_datetime(_, param, value):
67
+ def _str_to_source(_, __, value):
67
68
  if value:
68
- return to_datetime(value)
69
- else:
70
- raise ValueError(f"--{param.name} is mandatory")
69
+ return Sentinel2Source(collection=value)
71
70
 
72
71
 
73
72
  arg_stac_item = click.argument("stac-item", type=click.Path(path_type=MPath))
@@ -167,27 +166,12 @@ opt_start_time = click.option(
167
166
  opt_end_time = click.option(
168
167
  "--end-time", type=click.STRING, callback=_str_to_datetime, help="End time"
169
168
  )
170
- opt_archive = click.option(
171
- "--archive",
172
- type=click.Choice([archive.name for archive in KnownArchives]),
173
- default="S2AWS_COG",
174
- help="Archive to read from.",
175
- callback=_archive_name_to_archive_cls,
176
- )
177
- opt_collection = click.option(
178
- "--collection",
169
+ opt_source = click.option(
170
+ "--source",
179
171
  type=click.STRING,
180
- help="Data collection to be queried.",
181
- )
182
- opt_endpoint = click.option(
183
- "--endpoint",
184
- type=click.STRING,
185
- help="Search endpoint.",
186
- )
187
- opt_catalog_json = click.option(
188
- "--catalog-json",
189
- type=click.Path(path_type=MPath),
190
- help="JSON file for a static catalog.",
172
+ default="EarthSearch",
173
+ callback=_str_to_source,
174
+ help="Data source to be queried.",
191
175
  )
192
176
  opt_name = click.option("--name", type=click.STRING, help="Static catalog name.")
193
177
  opt_description = click.option(
@@ -11,7 +11,7 @@ from mapchete_eo.io.profiles import COGDeflateProfile
11
11
  from mapchete_eo.platforms.sentinel2.brdf.config import BRDFModels
12
12
  from mapchete_eo.platforms.sentinel2.config import BRDFConfig
13
13
  from mapchete_eo.platforms.sentinel2.product import S2Product
14
- from mapchete_eo.platforms.sentinel2.metadata_parser import Resolution
14
+ from mapchete_eo.platforms.sentinel2.metadata_parser.s2metadata import Resolution
15
15
  from mapchete_eo.platforms.sentinel2.types import L2ABand
16
16
 
17
17
 
@@ -12,10 +12,9 @@ from mapchete.path import MPath
12
12
  from mapchete.types import Bounds
13
13
 
14
14
  from mapchete_eo.cli import options_arguments
15
- from mapchete_eo.cli.static_catalog import get_catalog
16
15
  from mapchete_eo.io.products import Slice, products_to_slices
17
- from mapchete_eo.platforms.sentinel2.archives import KnownArchives
18
16
  from mapchete_eo.platforms.sentinel2.product import S2Product
17
+ from mapchete_eo.platforms.sentinel2.source import Sentinel2Source
19
18
  from mapchete_eo.sort import TargetDateSort
20
19
  from mapchete_eo.types import TimeRange
21
20
 
@@ -26,10 +25,7 @@ from mapchete_eo.types import TimeRange
26
25
  @options_arguments.opt_end_time
27
26
  @opt_bounds
28
27
  @options_arguments.opt_mgrs_tile
29
- @options_arguments.opt_archive
30
- @options_arguments.opt_collection
31
- @options_arguments.opt_endpoint
32
- @options_arguments.opt_catalog_json
28
+ @options_arguments.opt_source
33
29
  @click.option(
34
30
  "--format",
35
31
  type=click.Choice(["FlatGeobuf", "GeoJSON"]),
@@ -46,32 +42,20 @@ def s2_cat_results(
46
42
  end_time: datetime,
47
43
  bounds: Optional[Bounds] = None,
48
44
  mgrs_tile: Optional[str] = None,
49
- archive: Optional[KnownArchives] = None,
50
- collection: Optional[str] = None,
51
- endpoint: Optional[str] = None,
52
- catalog_json: Optional[MPath] = None,
45
+ source: Sentinel2Source = Sentinel2Source(collection="EarthSearch"),
53
46
  format: Literal["FlatGeobuf", "GeoJSON"] = "FlatGeobuf",
54
47
  by_slices: bool = False,
55
48
  add_index: bool = False,
56
49
  debug: bool = False,
57
50
  ):
58
51
  """Write a search result."""
59
- if catalog_json and endpoint: # pragma: no cover
60
- raise click.ClickException(
61
- "exactly one of --archive, --catalog-json or --endpoint has to be set."
62
- )
63
52
  if any([start_time is None, end_time is None]): # pragma: no cover
64
53
  raise click.ClickException("--start-time and --end-time are mandatory")
65
54
  if all([bounds is None, mgrs_tile is None]): # pragma: no cover
66
55
  raise click.ClickException("--bounds or --mgrs-tile are required")
67
56
  slice_property_key = "s2:datastrip_id"
68
57
  with click_spinner.Spinner(disable=debug):
69
- catalog = get_catalog(
70
- catalog_json=catalog_json,
71
- endpoint=endpoint,
72
- known_archive=archive,
73
- collection=collection,
74
- )
58
+ catalog = source.get_catalog()
75
59
  slices = products_to_slices(
76
60
  [
77
61
  S2Product.from_stac_item(item)
@@ -9,8 +9,7 @@ from tqdm import tqdm
9
9
 
10
10
  from mapchete_eo.cli import options_arguments
11
11
  from mapchete_eo.cli.s2_verify import verify_item
12
- from mapchete_eo.cli.static_catalog import get_catalog
13
- from mapchete_eo.platforms.sentinel2.archives import KnownArchives
12
+ from mapchete_eo.platforms.sentinel2.source import Sentinel2Source
14
13
  from mapchete_eo.product import add_to_blacklist, blacklist_products
15
14
  from mapchete_eo.types import TimeRange
16
15
 
@@ -19,10 +18,7 @@ from mapchete_eo.types import TimeRange
19
18
  @opt_bounds
20
19
  @options_arguments.opt_start_time
21
20
  @options_arguments.opt_end_time
22
- @options_arguments.opt_archive
23
- @options_arguments.opt_collection
24
- @options_arguments.opt_endpoint
25
- @options_arguments.opt_catalog_json
21
+ @options_arguments.opt_source
26
22
  @options_arguments.opt_assets
27
23
  @options_arguments.opt_blacklist
28
24
  @options_arguments.opt_thumbnail_dir
@@ -32,10 +28,7 @@ def s2_find_broken_products(
32
28
  end_time: datetime,
33
29
  bounds: Optional[Bounds] = None,
34
30
  mgrs_tile: Optional[str] = None,
35
- archive: Optional[KnownArchives] = None,
36
- collection: Optional[str] = None,
37
- endpoint: Optional[str] = None,
38
- catalog_json: Optional[MPath] = None,
31
+ source: Sentinel2Source = Sentinel2Source(collection="EarthSearch"),
39
32
  assets: List[str] = [],
40
33
  asset_exists_check: bool = True,
41
34
  blacklist: MPath = MPath("s3://eox-mhub-cache/blacklist.txt"),
@@ -43,20 +36,11 @@ def s2_find_broken_products(
43
36
  **__,
44
37
  ):
45
38
  """Find broken Sentinel-2 products."""
46
- if catalog_json and endpoint: # pragma: no cover
47
- raise click.ClickException(
48
- "exactly one of --archive, --catalog-json or --endpoint has to be set."
49
- )
50
39
  if any([start_time is None, end_time is None]): # pragma: no cover
51
40
  raise click.ClickException("--start-time and --end-time are mandatory")
52
41
  if all([bounds is None, mgrs_tile is None]): # pragma: no cover
53
42
  raise click.ClickException("--bounds or --mgrs-tile are required")
54
- catalog = get_catalog(
55
- catalog_json=catalog_json,
56
- endpoint=endpoint,
57
- known_archive=archive,
58
- collection=collection,
59
- )
43
+ catalog = source.get_catalog()
60
44
  blacklisted_products = blacklist_products(blacklist)
61
45
  for item in tqdm(
62
46
  catalog.search(
@@ -19,7 +19,7 @@ from shapely import prepare
19
19
 
20
20
  from mapchete_eo.cli import options_arguments
21
21
  from mapchete_eo.io.items import item_fix_footprint
22
- from mapchete_eo.search.s2_mgrs import InvalidMGRSSquare, S2Tile, bounds_to_geom
22
+ from mapchete_eo.search.s2_mgrs import InvalidMGRSSquare, S2Tile
23
23
  from mapchete_eo.time import day_range
24
24
 
25
25
  logger = logging.getLogger(__name__)
@@ -106,7 +106,7 @@ def s2_jp2_static_catalog(
106
106
  - each S2Tile file contains for each STAC item one entry with geometry and href
107
107
  """
108
108
  bounds = bounds or Bounds(-180, -90, 180, 90)
109
- aoi = bounds_to_geom(bounds)
109
+ aoi = bounds.latlon_geometry()
110
110
  prepare(aoi)
111
111
  items_per_tile = defaultdict(list)
112
112
  for day in day_range(start_date=start_time, end_date=end_time):
@@ -9,10 +9,8 @@ from rasterio.profiles import Profile
9
9
 
10
10
  from mapchete_eo.cli import options_arguments
11
11
  from mapchete_eo.platforms.sentinel2 import S2Metadata
12
- from mapchete_eo.platforms.sentinel2.archives import KnownArchives
12
+ from mapchete_eo.platforms.sentinel2.source import Sentinel2Source
13
13
  from mapchete_eo.platforms.sentinel2.types import Resolution
14
- from mapchete_eo.search import STACSearchCatalog, STACStaticCatalog
15
- from mapchete_eo.search.base import CatalogSearcher
16
14
  from mapchete_eo.types import TimeRange
17
15
 
18
16
 
@@ -22,10 +20,7 @@ from mapchete_eo.types import TimeRange
22
20
  @options_arguments.opt_mgrs_tile
23
21
  @options_arguments.opt_start_time
24
22
  @options_arguments.opt_end_time
25
- @options_arguments.opt_archive
26
- @options_arguments.opt_collection
27
- @options_arguments.opt_endpoint
28
- @options_arguments.opt_catalog_json
23
+ @options_arguments.opt_source
29
24
  @options_arguments.opt_name
30
25
  @options_arguments.opt_description
31
26
  @options_arguments.opt_assets
@@ -40,10 +35,7 @@ def static_catalog(
40
35
  end_time: datetime,
41
36
  bounds: Optional[Bounds] = None,
42
37
  mgrs_tile: Optional[str] = None,
43
- archive: Optional[KnownArchives] = None,
44
- collection: Optional[str] = None,
45
- endpoint: Optional[str] = None,
46
- catalog_json: Optional[MPath] = None,
38
+ source: Sentinel2Source = Sentinel2Source(collection="EarthSearch"),
47
39
  name: Optional[str] = None,
48
40
  description: Optional[str] = None,
49
41
  assets: Optional[List[str]] = None,
@@ -54,20 +46,11 @@ def static_catalog(
54
46
  **__,
55
47
  ):
56
48
  """Write a static STAC catalog for selected area."""
57
- if catalog_json and endpoint: # pragma: no cover
58
- raise click.ClickException(
59
- "exactly one of --archive, --catalog-json or --endpoint has to be set."
60
- )
61
49
  if any([start_time is None, end_time is None]): # pragma: no cover
62
50
  raise click.ClickException("--start-time and --end-time are mandatory")
63
51
  if all([bounds is None, mgrs_tile is None]): # pragma: no cover
64
52
  raise click.ClickException("--bounds or --mgrs-tile are required")
65
- catalog = get_catalog(
66
- catalog_json=catalog_json,
67
- endpoint=endpoint,
68
- known_archive=archive,
69
- collection=collection,
70
- )
53
+ catalog = source.get_catalog()
71
54
  if hasattr(catalog, "write_static_catalog"):
72
55
  with options_arguments.TqdmUpTo(
73
56
  unit="products", unit_scale=True, miniters=1, disable=opt_debug
@@ -97,27 +80,3 @@ def static_catalog(
97
80
  raise AttributeError(
98
81
  f"catalog {catalog} does not support writing a static version"
99
82
  )
100
-
101
-
102
- def get_catalog(
103
- catalog_json: Optional[MPath],
104
- endpoint: Optional[MPath],
105
- known_archive: Optional[KnownArchives] = None,
106
- collection: Optional[str] = None,
107
- ) -> CatalogSearcher:
108
- if catalog_json:
109
- return STACStaticCatalog(
110
- baseurl=catalog_json,
111
- )
112
- elif endpoint:
113
- if collection:
114
- return STACSearchCatalog(
115
- endpoint=endpoint,
116
- collections=[collection],
117
- )
118
- else:
119
- raise ValueError("collection must be provided")
120
- elif known_archive:
121
- return known_archive.value.catalog
122
- else:
123
- raise TypeError("cannot determine catalog")
mapchete_eo/eostac.py CHANGED
@@ -5,7 +5,7 @@ Driver class for EOSTAC static STAC catalogs.
5
5
  from mapchete_eo import base
6
6
 
7
7
  METADATA: dict = {
8
- "driver_name": "EOSTAC_DEV",
8
+ "driver_name": "EOSTAC",
9
9
  "data_type": None,
10
10
  "mode": "r",
11
11
  "file_extensions": [],
mapchete_eo/io/assets.py CHANGED
@@ -32,7 +32,7 @@ logger = logging.getLogger(__name__)
32
32
 
33
33
 
34
34
  class STACRasterBandProperties(BaseModel):
35
- nodata: NodataVal = None
35
+ nodata: Optional[NodataVal] = None
36
36
  data_type: Optional[str] = None
37
37
  scale: float = 1.0
38
38
  offset: float = 0.0
@@ -40,9 +40,9 @@ class STACRasterBandProperties(BaseModel):
40
40
  @staticmethod
41
41
  def from_asset(
42
42
  asset: pystac.Asset,
43
- nodataval: NodataVal = None,
43
+ nodataval: Optional[NodataVal] = None,
44
44
  ) -> STACRasterBandProperties:
45
- if asset.extra_fields.get("raster:offset") is not None:
45
+ if asset.extra_fields.get("raster:offset", {}):
46
46
  properties = dict(
47
47
  offset=asset.extra_fields.get("raster:offset"),
48
48
  scale=asset.extra_fields.get("raster:scale"),
@@ -82,41 +82,45 @@ def asset_to_np_array(
82
82
  path = asset_mpath(item, asset)
83
83
 
84
84
  # find out asset details if raster:bands is available
85
- stac_raster_bands = STACRasterBandProperties.from_asset(
85
+ band_properties = STACRasterBandProperties.from_asset(
86
86
  item.assets[asset], nodataval=nodataval
87
87
  )
88
88
 
89
89
  logger.debug("reading asset %s and indexes %s ...", asset, indexes)
90
- data = read_raster(
90
+ array = read_raster(
91
91
  inp=path,
92
92
  indexes=indexes,
93
93
  grid=grid,
94
94
  resampling=resampling.name,
95
- dst_nodata=stac_raster_bands.nodata,
96
- ).data
97
-
98
- if apply_offset and stac_raster_bands.offset:
99
- data_type = stac_raster_bands.data_type or data.dtype
95
+ dst_nodata=band_properties.nodata,
96
+ ).array
97
+ if apply_offset and band_properties.offset:
98
+ logger.debug(
99
+ "apply offset %s and scale %s to asset %s",
100
+ band_properties.offset,
101
+ band_properties.scale,
102
+ asset,
103
+ )
104
+ data_type = band_properties.data_type or array.dtype
100
105
 
101
106
  # determine value range for the target data_type
102
107
  clip_min, clip_max = dtype_ranges[str(data_type)]
103
108
 
104
109
  # increase minimum clip value to avoid collission with nodata value
105
- if clip_min == stac_raster_bands.nodata:
110
+ if clip_min == band_properties.nodata:
106
111
  clip_min += 1
107
112
 
108
- data[:] = (
113
+ array[~array.mask] = (
109
114
  (
110
- ((data * stac_raster_bands.scale) + stac_raster_bands.offset)
111
- / stac_raster_bands.scale
115
+ ((array[~array.mask] * band_properties.scale) + band_properties.offset)
116
+ / band_properties.scale
112
117
  )
113
118
  .round()
114
119
  .clip(clip_min, clip_max)
115
120
  .astype(data_type, copy=False)
116
121
  .data
117
122
  )
118
-
119
- return data
123
+ return array
120
124
 
121
125
 
122
126
  def get_assets(