mapchete-eo 2025.10.0__py2.py3-none-any.whl → 2025.11.0__py2.py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (66) hide show
  1. mapchete_eo/__init__.py +1 -1
  2. mapchete_eo/array/convert.py +7 -1
  3. mapchete_eo/base.py +123 -55
  4. mapchete_eo/cli/options_arguments.py +11 -27
  5. mapchete_eo/cli/s2_brdf.py +1 -1
  6. mapchete_eo/cli/s2_cat_results.py +4 -20
  7. mapchete_eo/cli/s2_find_broken_products.py +4 -20
  8. mapchete_eo/cli/s2_jp2_static_catalog.py +2 -2
  9. mapchete_eo/cli/static_catalog.py +4 -45
  10. mapchete_eo/eostac.py +1 -1
  11. mapchete_eo/io/assets.py +7 -7
  12. mapchete_eo/io/items.py +37 -22
  13. mapchete_eo/io/levelled_cubes.py +66 -35
  14. mapchete_eo/io/path.py +19 -8
  15. mapchete_eo/io/products.py +37 -27
  16. mapchete_eo/platforms/sentinel2/__init__.py +1 -1
  17. mapchete_eo/platforms/sentinel2/_mapper_registry.py +89 -0
  18. mapchete_eo/platforms/sentinel2/brdf/correction.py +1 -1
  19. mapchete_eo/platforms/sentinel2/brdf/hls.py +1 -1
  20. mapchete_eo/platforms/sentinel2/brdf/models.py +1 -1
  21. mapchete_eo/platforms/sentinel2/brdf/protocols.py +1 -1
  22. mapchete_eo/platforms/sentinel2/brdf/ross_thick.py +1 -1
  23. mapchete_eo/platforms/sentinel2/brdf/sun_angle_arrays.py +1 -1
  24. mapchete_eo/platforms/sentinel2/config.py +73 -13
  25. mapchete_eo/platforms/sentinel2/driver.py +0 -39
  26. mapchete_eo/platforms/sentinel2/metadata_parser/__init__.py +6 -0
  27. mapchete_eo/platforms/sentinel2/{path_mappers → metadata_parser}/base.py +1 -1
  28. mapchete_eo/platforms/sentinel2/{path_mappers/metadata_xml.py → metadata_parser/default_path_mapper.py} +2 -2
  29. mapchete_eo/platforms/sentinel2/metadata_parser/models.py +78 -0
  30. mapchete_eo/platforms/sentinel2/{metadata_parser.py → metadata_parser/s2metadata.py} +51 -146
  31. mapchete_eo/platforms/sentinel2/preconfigured_sources/__init__.py +57 -0
  32. mapchete_eo/platforms/sentinel2/preconfigured_sources/guessers.py +108 -0
  33. mapchete_eo/platforms/sentinel2/preconfigured_sources/item_mappers.py +171 -0
  34. mapchete_eo/platforms/sentinel2/preconfigured_sources/metadata_xml_mappers.py +217 -0
  35. mapchete_eo/platforms/sentinel2/preprocessing_tasks.py +22 -1
  36. mapchete_eo/platforms/sentinel2/processing_baseline.py +3 -0
  37. mapchete_eo/platforms/sentinel2/product.py +88 -23
  38. mapchete_eo/platforms/sentinel2/source.py +114 -0
  39. mapchete_eo/platforms/sentinel2/types.py +5 -0
  40. mapchete_eo/processes/merge_rasters.py +7 -3
  41. mapchete_eo/product.py +14 -9
  42. mapchete_eo/protocols.py +5 -0
  43. mapchete_eo/search/__init__.py +3 -3
  44. mapchete_eo/search/base.py +126 -100
  45. mapchete_eo/search/config.py +25 -4
  46. mapchete_eo/search/s2_mgrs.py +8 -9
  47. mapchete_eo/search/stac_search.py +111 -75
  48. mapchete_eo/search/stac_static.py +63 -94
  49. mapchete_eo/search/utm_search.py +39 -48
  50. mapchete_eo/settings.py +1 -0
  51. mapchete_eo/sort.py +16 -2
  52. mapchete_eo/source.py +107 -0
  53. {mapchete_eo-2025.10.0.dist-info → mapchete_eo-2025.11.0.dist-info}/METADATA +2 -1
  54. mapchete_eo-2025.11.0.dist-info/RECORD +89 -0
  55. {mapchete_eo-2025.10.0.dist-info → mapchete_eo-2025.11.0.dist-info}/entry_points.txt +1 -1
  56. mapchete_eo/archives/__init__.py +0 -0
  57. mapchete_eo/archives/base.py +0 -65
  58. mapchete_eo/geometry.py +0 -271
  59. mapchete_eo/known_catalogs.py +0 -42
  60. mapchete_eo/platforms/sentinel2/archives.py +0 -190
  61. mapchete_eo/platforms/sentinel2/path_mappers/__init__.py +0 -29
  62. mapchete_eo/platforms/sentinel2/path_mappers/earthsearch.py +0 -34
  63. mapchete_eo/platforms/sentinel2/path_mappers/sinergise.py +0 -105
  64. mapchete_eo-2025.10.0.dist-info/RECORD +0 -88
  65. {mapchete_eo-2025.10.0.dist-info → mapchete_eo-2025.11.0.dist-info}/WHEEL +0 -0
  66. {mapchete_eo-2025.10.0.dist-info → mapchete_eo-2025.11.0.dist-info}/licenses/LICENSE +0 -0
@@ -1,19 +1,20 @@
1
+ from __future__ import annotations
2
+
1
3
  import logging
2
4
  from datetime import datetime
3
5
  from functools import cached_property
4
- from typing import Any, Callable, Dict, Generator, Iterator, List, Optional, Set, Union
6
+ from typing import Any, Dict, Generator, Iterator, List, Optional, Set, Union
5
7
 
6
8
  from mapchete import Timer
7
- from mapchete.path import MPathLike
8
9
  from mapchete.tile import BufferedTilePyramid
9
10
  from mapchete.types import Bounds, BoundsLike
10
11
  from pystac import Item
11
- from pystac_client import Client
12
- from shapely.geometry import shape
12
+ from pystac_client import Client, CollectionClient, ItemSearch
13
+ from shapely.geometry import shape, box
13
14
  from shapely.geometry.base import BaseGeometry
14
15
 
15
16
  from mapchete_eo.product import blacklist_products
16
- from mapchete_eo.search.base import CatalogSearcher, StaticCatalogWriterMixin
17
+ from mapchete_eo.search.base import CollectionSearcher, StaticCollectionWriterMixin
17
18
  from mapchete_eo.search.config import StacSearchConfig
18
19
  from mapchete_eo.settings import mapchete_eo_settings
19
20
  from mapchete_eo.types import TimeRange
@@ -21,8 +22,8 @@ from mapchete_eo.types import TimeRange
21
22
  logger = logging.getLogger(__name__)
22
23
 
23
24
 
24
- class STACSearchCatalog(StaticCatalogWriterMixin, CatalogSearcher):
25
- endpoint: str
25
+ class STACSearchCollection(StaticCollectionWriterMixin, CollectionSearcher):
26
+ collection: str
26
27
  blacklist: Set[str] = (
27
28
  blacklist_products(mapchete_eo_settings.blacklist)
28
29
  if mapchete_eo_settings.blacklist
@@ -30,49 +31,66 @@ class STACSearchCatalog(StaticCatalogWriterMixin, CatalogSearcher):
30
31
  )
31
32
  config_cls = StacSearchConfig
32
33
 
33
- def __init__(
34
- self,
35
- collections: Optional[List[str]] = None,
36
- stac_item_modifiers: Optional[List[Callable[[Item], Item]]] = None,
37
- endpoint: Optional[MPathLike] = None,
38
- ):
39
- if collections:
40
- self.collections = collections
34
+ @cached_property
35
+ def client(self) -> CollectionClient:
36
+ return CollectionClient.from_file(self.collection)
37
+
38
+ @cached_property
39
+ def eo_bands(self) -> List[str]:
40
+ item_assets = self.client.extra_fields.get("item_assets", {})
41
+ for v in item_assets.values():
42
+ if "eo:bands" in v and "data" in v.get("roles", []):
43
+ return ["eo:bands"]
41
44
  else: # pragma: no cover
42
- raise ValueError("collections must be given")
43
- self.client = Client.open(endpoint or self.endpoint)
44
- self.id = self.client.id
45
- self.description = self.client.description
46
- self.stac_extensions = self.client.stac_extensions
47
- self.eo_bands = self._eo_bands()
48
- self.stac_item_modifiers = stac_item_modifiers
45
+ logger.debug("cannot find eo:bands definition from collections")
46
+ return []
47
+
48
+ @cached_property
49
+ def id(self) -> str:
50
+ return self.client.id
51
+
52
+ @cached_property
53
+ def description(self) -> str:
54
+ return self.client.description
55
+
56
+ @cached_property
57
+ def stac_extensions(self) -> List[str]:
58
+ return self.client.stac_extensions
49
59
 
50
60
  def search(
51
61
  self,
52
62
  time: Optional[Union[TimeRange, List[TimeRange]]] = None,
53
63
  bounds: Optional[BoundsLike] = None,
54
64
  area: Optional[BaseGeometry] = None,
65
+ query: Optional[str] = None,
55
66
  search_kwargs: Optional[Dict[str, Any]] = None,
56
67
  ) -> Generator[Item, None, None]:
57
68
  config = self.config_cls(**search_kwargs or {})
58
69
  if bounds:
59
70
  bounds = Bounds.from_inp(bounds)
60
- if time is None: # pragma: no cover
61
- raise ValueError("time must be set")
62
71
  if area is None and bounds is None: # pragma: no cover
63
72
  raise ValueError("either bounds or area have to be given")
64
73
 
65
74
  if area is not None and area.is_empty: # pragma: no cover
66
75
  return
67
76
 
68
- def _searches():
69
- for time_range in time if isinstance(time, list) else [time]:
77
+ def _searches() -> Generator[ItemSearch, None, None]:
78
+ def _search_chunks(
79
+ time_range: Optional[TimeRange] = None,
80
+ bounds: Optional[BoundsLike] = None,
81
+ area: Optional[BaseGeometry] = None,
82
+ query: Optional[str] = None,
83
+ ):
70
84
  search = self._search(
71
- time_range=time_range, bounds=bounds, area=area, config=config
85
+ time_range=time_range,
86
+ bounds=bounds,
87
+ area=box(*area.bounds) if area else None,
88
+ query=query,
89
+ config=config,
72
90
  )
73
91
  logger.debug("found %s products", search.matched())
74
92
  matched = search.matched() or 0
75
- if matched > config.catalog_chunk_threshold:
93
+ if matched > config.catalog_chunk_threshold: # pragma: no cover
76
94
  spatial_search_chunks = SpatialSearchChunks(
77
95
  bounds=bounds,
78
96
  area=area,
@@ -88,6 +106,7 @@ class STACSearchCatalog(StaticCatalogWriterMixin, CatalogSearcher):
88
106
  with Timer() as duration:
89
107
  chunk_search = self._search(
90
108
  time_range=time_range,
109
+ query=query,
91
110
  config=config,
92
111
  **chunk_kwargs,
93
112
  )
@@ -102,48 +121,60 @@ class STACSearchCatalog(StaticCatalogWriterMixin, CatalogSearcher):
102
121
  else:
103
122
  yield search
104
123
 
124
+ if time:
125
+ # search time range(s)
126
+ for time_range in time if isinstance(time, list) else [time]:
127
+ yield from _search_chunks(
128
+ time_range=time_range,
129
+ bounds=bounds,
130
+ area=area,
131
+ query=query,
132
+ )
133
+ else:
134
+ # don't apply temporal filter
135
+ yield from _search_chunks(
136
+ bounds=bounds,
137
+ area=area,
138
+ query=query,
139
+ )
140
+
105
141
  for search in _searches():
106
- for count, item in enumerate(search.items(), 1):
107
- item_path = item.get_self_href()
108
- # logger.debug("item %s/%s ...", count, search.matched())
109
- if item_path in self.blacklist: # pragma: no cover
110
- logger.debug("item %s found in blacklist and skipping", item_path)
111
- else:
112
- yield item
113
-
114
- def _eo_bands(self) -> List[str]:
115
- for collection_name in self.collections:
116
- collection = self.client.get_collection(collection_name)
117
- if collection:
118
- item_assets = collection.extra_fields.get("item_assets", {})
119
- for v in item_assets.values():
120
- if "eo:bands" in v and "data" in v.get("roles", []):
121
- return ["eo:bands"]
122
- else: # pragma: no cover
123
- raise ValueError(f"cannot find collection {collection}")
124
- else: # pragma: no cover
125
- logger.debug("cannot find eo:bands definition from collections")
126
- return []
142
+ for item in search.items():
143
+ if item.get_self_href() in self.blacklist: # pragma: no cover
144
+ logger.debug(
145
+ "item %s found in blacklist and skipping", item.get_self_href()
146
+ )
147
+ continue
148
+ yield item
127
149
 
128
150
  @cached_property
129
151
  def default_search_params(self):
130
152
  return {
131
- "collections": self.collections,
153
+ "collections": [self.client],
132
154
  "bbox": None,
133
155
  "intersects": None,
134
156
  }
135
157
 
158
+ @cached_property
159
+ def search_client(self) -> Client:
160
+ # looks weird, right?
161
+ #
162
+ # one would assume that directly returning self.client.get_root() would
163
+ # do the same but if we do so, it seems to ignore the "collections" parameter
164
+ # and thus query all collection available on that search endpoint.
165
+ #
166
+ # the only way to fix this, is to instantiate Client from scratch.
167
+ return Client.from_file(self.client.get_root().self_href)
168
+
136
169
  def _search(
137
170
  self,
138
171
  time_range: Optional[TimeRange] = None,
139
172
  bounds: Optional[Bounds] = None,
140
173
  area: Optional[BaseGeometry] = None,
174
+ query: Optional[str] = None,
141
175
  config: StacSearchConfig = StacSearchConfig(),
142
176
  **kwargs,
143
- ):
144
- if time_range is None: # pragma: no cover
145
- raise ValueError("time_range not provided")
146
-
177
+ ) -> ItemSearch:
147
178
  if bounds is not None:
148
179
  if shape(bounds).is_empty: # pragma: no cover
149
180
  raise ValueError("bounds empty")
@@ -153,22 +184,29 @@ class STACSearchCatalog(StaticCatalogWriterMixin, CatalogSearcher):
153
184
  raise ValueError("area empty")
154
185
  kwargs.update(intersects=area)
155
186
 
156
- start = (
157
- time_range.start.date()
158
- if isinstance(time_range.start, datetime)
159
- else time_range.start
160
- )
161
- end = (
162
- time_range.end.date()
163
- if isinstance(time_range.end, datetime)
164
- else time_range.end
165
- )
166
- search_params = dict(
167
- self.default_search_params,
168
- datetime=f"{start}/{end}",
169
- query=[f"eo:cloud_cover<={config.max_cloud_cover}"],
170
- **kwargs,
171
- )
187
+ if time_range:
188
+ start = (
189
+ time_range.start.date()
190
+ if isinstance(time_range.start, datetime)
191
+ else time_range.start
192
+ )
193
+ end = (
194
+ time_range.end.date()
195
+ if isinstance(time_range.end, datetime)
196
+ else time_range.end
197
+ )
198
+ search_params = dict(
199
+ self.default_search_params,
200
+ datetime=f"{start}/{end}",
201
+ query=[query] if query else None,
202
+ **kwargs,
203
+ )
204
+ else:
205
+ search_params = dict(
206
+ self.default_search_params,
207
+ query=[query] if query else None,
208
+ **kwargs,
209
+ )
172
210
  if (
173
211
  bounds is None
174
212
  and area is None
@@ -177,14 +215,12 @@ class STACSearchCatalog(StaticCatalogWriterMixin, CatalogSearcher):
177
215
  raise ValueError("no bounds or area given")
178
216
  logger.debug("query catalog using params: %s", search_params)
179
217
  with Timer() as duration:
180
- result = self.client.search(**search_params, limit=config.catalog_pagesize)
218
+ result = self.search_client.search(
219
+ **search_params, limit=config.catalog_pagesize
220
+ )
181
221
  logger.debug("query took %s", str(duration))
182
222
  return result
183
223
 
184
- def get_collections(self):
185
- for collection_name in self.collections:
186
- yield self.client.get_collection(collection_name)
187
-
188
224
 
189
225
  class SpatialSearchChunks:
190
226
  bounds: Bounds
@@ -1,21 +1,21 @@
1
+ from functools import cached_property
1
2
  import logging
2
3
  import warnings
3
- from typing import Any, Callable, Dict, Generator, List, Optional, Union
4
+ from typing import Any, Dict, Generator, List, Optional, Union
4
5
 
5
6
  from mapchete import Bounds
6
7
  from mapchete.types import BoundsLike
7
8
  from pystac import Item, Catalog, Collection
8
9
  from mapchete.io.vector import bounds_intersect
9
- from mapchete.path import MPathLike
10
10
  from pystac.stac_io import StacIO
11
- from pystac_client import Client
11
+ from pystac_client import CollectionClient
12
12
  from shapely.geometry import shape
13
13
  from shapely.geometry.base import BaseGeometry
14
14
 
15
15
  from mapchete_eo.search.base import (
16
- CatalogSearcher,
16
+ CollectionSearcher,
17
17
  FSSpecStacIO,
18
- StaticCatalogWriterMixin,
18
+ StaticCollectionWriterMixin,
19
19
  filter_items,
20
20
  )
21
21
  from mapchete_eo.search.config import StacStaticConfig
@@ -28,37 +28,67 @@ logger = logging.getLogger(__name__)
28
28
  StacIO.set_default(FSSpecStacIO)
29
29
 
30
30
 
31
- class STACStaticCatalog(StaticCatalogWriterMixin, CatalogSearcher):
31
+ class STACStaticCollection(StaticCollectionWriterMixin, CollectionSearcher):
32
32
  config_cls = StacStaticConfig
33
33
 
34
- def __init__(
35
- self,
36
- baseurl: MPathLike,
37
- stac_item_modifiers: Optional[List[Callable[[Item], Item]]] = None,
38
- ):
39
- self.client = Client.from_file(str(baseurl), stac_io=FSSpecStacIO())
40
- self.id = self.client.id
41
- self.description = self.client.description
42
- self.stac_extensions = self.client.stac_extensions
43
- self.collections = [c.id for c in self.client.get_children()]
44
- self.eo_bands = self._eo_bands()
45
- self.stac_item_modifiers = stac_item_modifiers
34
+ @cached_property
35
+ def client(self) -> CollectionClient:
36
+ return CollectionClient.from_file(str(self.collection), stac_io=FSSpecStacIO())
37
+
38
+ @cached_property
39
+ def eo_bands(self) -> List[str]:
40
+ eo_bands = self.client.extra_fields.get("properties", {}).get("eo:bands")
41
+ if eo_bands:
42
+ return eo_bands
43
+ else:
44
+ warnings.warn(
45
+ "Unable to read eo:bands definition from collection. "
46
+ "Trying now to get information from assets ..."
47
+ )
48
+ # see if eo:bands can be found in properties
49
+ try:
50
+ item = next(self.client.get_items(recursive=True))
51
+ eo_bands = item.properties.get("eo:bands")
52
+ if eo_bands:
53
+ return eo_bands
54
+
55
+ # look through the assets and collect eo:bands
56
+ out = {}
57
+ for asset in item.assets.values():
58
+ for eo_band in asset.extra_fields.get("eo:bands", []):
59
+ out[eo_band["name"]] = eo_band
60
+ if out:
61
+ return [v for v in out.values()]
62
+ except StopIteration:
63
+ pass
64
+
65
+ logger.debug("cannot find eo:bands definition")
66
+ return []
67
+
68
+ @cached_property
69
+ def id(self) -> str:
70
+ return self.client.id
71
+
72
+ @cached_property
73
+ def description(self) -> str:
74
+ return self.client.description
75
+
76
+ @cached_property
77
+ def stac_extensions(self) -> List[str]:
78
+ return self.client.stac_extensions
46
79
 
47
80
  def search(
48
81
  self,
49
82
  time: Optional[Union[TimeRange, List[TimeRange]]] = None,
50
83
  bounds: Optional[BoundsLike] = None,
51
84
  area: Optional[BaseGeometry] = None,
85
+ query: Optional[str] = None,
52
86
  search_kwargs: Optional[Dict[str, Any]] = None,
53
87
  ) -> Generator[Item, None, None]:
54
- config = self.config_cls(**search_kwargs or {})
55
88
  if area is None and bounds:
56
89
  bounds = Bounds.from_inp(bounds)
57
90
  area = shape(bounds)
58
- for item in filter_items(
59
- self._raw_search(time=time, area=area),
60
- max_cloud_cover=config.max_cloud_cover,
61
- ):
91
+ for item in filter_items(self._raw_search(time=time, area=area), query=query):
62
92
  yield item
63
93
 
64
94
  def _raw_search(
@@ -69,83 +99,22 @@ class STACStaticCatalog(StaticCatalogWriterMixin, CatalogSearcher):
69
99
  if area is not None and area.is_empty:
70
100
  return
71
101
  logger.debug("iterate through children")
72
- for collection in self.client.get_collections():
73
- if time:
74
- for time_range in time if isinstance(time, list) else [time]:
75
- for item in _all_intersecting_items(
76
- collection,
77
- area=area,
78
- time_range=time_range,
79
- ):
80
- item.make_asset_hrefs_absolute()
81
- yield item
82
- else:
102
+ if time:
103
+ for time_range in time if isinstance(time, list) else [time]:
83
104
  for item in _all_intersecting_items(
84
- collection,
105
+ self.client,
85
106
  area=area,
107
+ time_range=time_range,
86
108
  ):
87
109
  item.make_asset_hrefs_absolute()
88
110
  yield item
89
-
90
- def _eo_bands(self) -> List[str]:
91
- for collection in self.client.get_children():
92
- eo_bands = collection.extra_fields.get("properties", {}).get("eo:bands")
93
- if eo_bands:
94
- return eo_bands
95
111
  else:
96
- warnings.warn(
97
- "Unable to read eo:bands definition from collections. "
98
- "Trying now to get information from assets ..."
99
- )
100
-
101
- # see if eo:bands can be found in properties
102
- item = _get_first_item(self.client.get_children())
103
- eo_bands = item.properties.get("eo:bands")
104
- if eo_bands:
105
- return eo_bands
106
-
107
- # look through the assets and collect eo:bands
108
- out = {}
109
- for asset in item.assets.values():
110
- for eo_band in asset.extra_fields.get("eo:bands", []):
111
- out[eo_band["name"]] = eo_band
112
- if out:
113
- return [v for v in out.values()]
114
-
115
- logger.debug("cannot find eo:bands definition")
116
- return []
117
-
118
- def get_collections(
119
- self,
120
- time: Optional[Union[TimeRange, List[TimeRange]]] = None,
121
- bounds: Optional[BoundsLike] = None,
122
- area: Optional[BaseGeometry] = None,
123
- ):
124
- if area is None and bounds is not None:
125
- area = Bounds.from_inp(bounds).geometry
126
- for collection in self.client.get_children():
127
- if time:
128
- for time_range in time if isinstance(time, list) else [time]:
129
- if _collection_extent_intersects(
130
- collection,
131
- area=area,
132
- time_range=time_range,
133
- ):
134
- yield collection
135
- else:
136
- if _collection_extent_intersects(collection, area=area):
137
- yield collection
138
-
139
-
140
- def _get_first_item(collections):
141
- for collection in collections:
142
- for item in collection.get_all_items():
143
- return item
144
- else:
145
- for child in collection.get_children():
146
- return _get_first_item(child)
147
- else:
148
- raise ValueError("collections contain no items")
112
+ for item in _all_intersecting_items(
113
+ self.client,
114
+ area=area,
115
+ ):
116
+ item.make_asset_hrefs_absolute()
117
+ yield item
149
118
 
150
119
 
151
120
  def _all_intersecting_items(
@@ -1,6 +1,7 @@
1
1
  import datetime
2
+ from functools import cached_property
2
3
  import logging
3
- from typing import Any, Callable, Dict, Generator, List, Optional, Set, Union
4
+ from typing import Any, Dict, Generator, List, Optional, Set, Union
4
5
 
5
6
  from mapchete.io.vector import fiona_open
6
7
  from mapchete.path import MPath, MPathLike
@@ -14,8 +15,8 @@ from shapely.geometry.base import BaseGeometry
14
15
  from mapchete_eo.exceptions import ItemGeometryError
15
16
  from mapchete_eo.product import blacklist_products
16
17
  from mapchete_eo.search.base import (
17
- CatalogSearcher,
18
- StaticCatalogWriterMixin,
18
+ CollectionSearcher,
19
+ StaticCollectionWriterMixin,
19
20
  filter_items,
20
21
  )
21
22
  from mapchete_eo.search.config import UTMSearchConfig
@@ -27,7 +28,7 @@ from mapchete_eo.types import TimeRange
27
28
  logger = logging.getLogger(__name__)
28
29
 
29
30
 
30
- class UTMSearchCatalog(StaticCatalogWriterMixin, CatalogSearcher):
31
+ class UTMSearchCatalog(StaticCollectionWriterMixin, CollectionSearcher):
31
32
  endpoint: str
32
33
  id: str
33
34
  day_subdir_schema: str
@@ -41,33 +42,41 @@ class UTMSearchCatalog(StaticCatalogWriterMixin, CatalogSearcher):
41
42
  )
42
43
  config_cls = UTMSearchConfig
43
44
 
44
- def __init__(
45
- self,
46
- endpoint: Optional[MPathLike] = None,
47
- collections: List[str] = [],
48
- stac_item_modifiers: Optional[List[Callable[[Item], Item]]] = None,
49
- ):
50
- self.endpoint = endpoint or self.endpoint
51
- if len(collections) == 0: # pragma: no cover
52
- raise ValueError("no collections provided")
53
- self.collections = collections
54
- self.eo_bands = self._eo_bands()
55
- self.stac_item_modifiers = stac_item_modifiers
45
+ @cached_property
46
+ def eo_bands(self) -> List[str]: # pragma: no cover
47
+ for (
48
+ collection_properties
49
+ ) in UTMSearchConfig().sinergise_aws_collections.values():
50
+ if collection_properties["id"] == self.collection.split("/")[-1]:
51
+ collection = Collection.from_dict(
52
+ collection_properties["path"].read_json()
53
+ )
54
+ if collection:
55
+ summary = collection.summaries.to_dict()
56
+ if "eo:bands" in summary:
57
+ return summary["eo:bands"]
58
+ else:
59
+ raise ValueError(f"cannot find collection {collection}")
60
+ else:
61
+ logger.debug(
62
+ "cannot find eo:bands definition from collection %s",
63
+ self.collection,
64
+ )
65
+ return []
56
66
 
57
67
  def search(
58
68
  self,
59
69
  time: Optional[Union[TimeRange, List[TimeRange]]] = None,
60
70
  bounds: Optional[BoundsLike] = None,
61
71
  area: Optional[BaseGeometry] = None,
72
+ query: Optional[str] = None,
62
73
  search_kwargs: Optional[Dict[str, Any]] = None,
63
74
  ) -> Generator[Item, None, None]:
64
- config = self.config_cls(**search_kwargs or {})
65
- if bounds:
66
- bounds = Bounds.from_inp(bounds)
67
-
68
75
  for item in filter_items(
69
- self._raw_search(time=time, bounds=bounds, area=area),
70
- max_cloud_cover=config.max_cloud_cover,
76
+ self._raw_search(
77
+ time=time, bounds=Bounds.from_inp(bounds) if bounds else None, area=area
78
+ ),
79
+ query=query,
71
80
  ):
72
81
  yield item
73
82
 
@@ -88,7 +97,12 @@ class UTMSearchCatalog(StaticCatalogWriterMixin, CatalogSearcher):
88
97
  elif bounds is not None:
89
98
  bounds = Bounds.from_inp(bounds)
90
99
  area = shape(bounds)
91
- for time_range in time if isinstance(time, list) else [time]:
100
+
101
+ # Cleaner time list in case None present as time (undefined)
102
+ time_list: list[TimeRange] = (
103
+ [t for t in time if t is not None] if isinstance(time, list) else [time]
104
+ )
105
+ for time_range in time_list:
92
106
  start_time = (
93
107
  time_range.start
94
108
  if isinstance(time_range.start, datetime.date)
@@ -147,28 +161,6 @@ class UTMSearchCatalog(StaticCatalogWriterMixin, CatalogSearcher):
147
161
  elif area.intersects(shape(item.geometry)):
148
162
  yield item
149
163
 
150
- def _eo_bands(self) -> list:
151
- for collection_name in self.collections:
152
- for (
153
- collection_properties
154
- ) in UTMSearchConfig().sinergise_aws_collections.values():
155
- if collection_properties["id"] == collection_name:
156
- collection = Collection.from_dict(
157
- collection_properties["path"].read_json()
158
- )
159
- if collection:
160
- summary = collection.summaries.to_dict()
161
- if "eo:bands" in summary:
162
- return summary["eo:bands"]
163
- else:
164
- raise ValueError(f"cannot find collection {collection}")
165
- else:
166
- logger.debug(
167
- "cannot find eo:bands definition from collections %s",
168
- self.collections,
169
- )
170
- return []
171
-
172
164
  def get_collections(self):
173
165
  """
174
166
  yeild transformed collection from:
@@ -178,9 +170,8 @@ class UTMSearchCatalog(StaticCatalogWriterMixin, CatalogSearcher):
178
170
  """
179
171
  for collection_properties in self.config.sinergise_aws_collections.values():
180
172
  collection = Collection.from_dict(collection_properties["path"].read_json())
181
- for collection_name in self.collections:
182
- if collection_name == collection.id:
183
- yield collection
173
+ if self.collection.split("/")[-1] == collection.id:
174
+ yield collection
184
175
 
185
176
 
186
177
  def items_from_static_index(
mapchete_eo/settings.py CHANGED
@@ -16,6 +16,7 @@ class Settings(BaseSettings):
16
16
  default_cache_location: MPathLike = MPath("s3://eox-mhub-cache/")
17
17
  default_catalog_crs: CRS = CRS.from_epsg(4326)
18
18
  blacklist: Optional[MPathLike] = None
19
+ lazy_load_stac_items: bool = True
19
20
 
20
21
  # read from environment
21
22
  model_config = SettingsConfigDict(env_prefix="MAPCHETE_EO_")
mapchete_eo/sort.py CHANGED
@@ -6,7 +6,7 @@ from typing import Callable, List, Optional
6
6
 
7
7
  from pydantic import BaseModel
8
8
 
9
- from mapchete_eo.protocols import DateTimeProtocol
9
+ from mapchete_eo.protocols import DateTimeProtocol, GetPropertyProtocol
10
10
  from mapchete_eo.time import timedelta, to_datetime
11
11
  from mapchete_eo.types import DateTimeLike
12
12
 
@@ -22,7 +22,7 @@ def sort_objects_by_target_date(
22
22
  **kwargs,
23
23
  ) -> List[DateTimeProtocol]:
24
24
  """
25
- Return sorted list of onjects according to their distance to the target_date.
25
+ Return sorted list of objects according to their distance to the target_date.
26
26
 
27
27
  Default for target date is the middle between the objects start date and end date.
28
28
  """
@@ -46,3 +46,17 @@ class TargetDateSort(SortMethodConfig):
46
46
  func: Callable = sort_objects_by_target_date
47
47
  target_date: Optional[DateTimeLike] = None
48
48
  reverse: bool = False
49
+
50
+
51
+ def sort_objects_by_cloud_cover(
52
+ objects: List[GetPropertyProtocol], reverse: bool = False
53
+ ) -> List[GetPropertyProtocol]:
54
+ if len(objects) == 0: # pragma: no cover
55
+ return objects
56
+ objects.sort(key=lambda x: x.get_property("eo:cloud_cover"), reverse=reverse)
57
+ return objects
58
+
59
+
60
+ class CloudCoverSort(SortMethodConfig):
61
+ func: Callable = sort_objects_by_cloud_cover
62
+ reverse: bool = False