mapchete-eo 2025.10.1__py2.py3-none-any.whl → 2026.1.0__py2.py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (63) hide show
  1. mapchete_eo/__init__.py +1 -1
  2. mapchete_eo/base.py +94 -54
  3. mapchete_eo/cli/options_arguments.py +11 -27
  4. mapchete_eo/cli/s2_brdf.py +1 -1
  5. mapchete_eo/cli/s2_cat_results.py +4 -20
  6. mapchete_eo/cli/s2_find_broken_products.py +4 -20
  7. mapchete_eo/cli/s2_jp2_static_catalog.py +2 -2
  8. mapchete_eo/cli/static_catalog.py +4 -45
  9. mapchete_eo/eostac.py +1 -1
  10. mapchete_eo/io/assets.py +20 -16
  11. mapchete_eo/io/items.py +36 -23
  12. mapchete_eo/io/path.py +19 -8
  13. mapchete_eo/io/products.py +22 -24
  14. mapchete_eo/platforms/sentinel2/__init__.py +1 -1
  15. mapchete_eo/platforms/sentinel2/_mapper_registry.py +89 -0
  16. mapchete_eo/platforms/sentinel2/brdf/correction.py +1 -1
  17. mapchete_eo/platforms/sentinel2/brdf/hls.py +1 -1
  18. mapchete_eo/platforms/sentinel2/brdf/models.py +1 -1
  19. mapchete_eo/platforms/sentinel2/brdf/protocols.py +1 -1
  20. mapchete_eo/platforms/sentinel2/brdf/ross_thick.py +1 -1
  21. mapchete_eo/platforms/sentinel2/brdf/sun_angle_arrays.py +1 -1
  22. mapchete_eo/platforms/sentinel2/config.py +73 -13
  23. mapchete_eo/platforms/sentinel2/driver.py +0 -39
  24. mapchete_eo/platforms/sentinel2/metadata_parser/__init__.py +6 -0
  25. mapchete_eo/platforms/sentinel2/{path_mappers → metadata_parser}/base.py +1 -1
  26. mapchete_eo/platforms/sentinel2/{path_mappers/metadata_xml.py → metadata_parser/default_path_mapper.py} +2 -2
  27. mapchete_eo/platforms/sentinel2/metadata_parser/models.py +78 -0
  28. mapchete_eo/platforms/sentinel2/{metadata_parser.py → metadata_parser/s2metadata.py} +51 -144
  29. mapchete_eo/platforms/sentinel2/preconfigured_sources/__init__.py +57 -0
  30. mapchete_eo/platforms/sentinel2/preconfigured_sources/guessers.py +108 -0
  31. mapchete_eo/platforms/sentinel2/preconfigured_sources/item_mappers.py +171 -0
  32. mapchete_eo/platforms/sentinel2/preconfigured_sources/metadata_xml_mappers.py +217 -0
  33. mapchete_eo/platforms/sentinel2/preprocessing_tasks.py +22 -1
  34. mapchete_eo/platforms/sentinel2/processing_baseline.py +3 -0
  35. mapchete_eo/platforms/sentinel2/product.py +83 -18
  36. mapchete_eo/platforms/sentinel2/source.py +114 -0
  37. mapchete_eo/platforms/sentinel2/types.py +5 -0
  38. mapchete_eo/product.py +14 -8
  39. mapchete_eo/protocols.py +5 -0
  40. mapchete_eo/search/__init__.py +3 -3
  41. mapchete_eo/search/base.py +127 -99
  42. mapchete_eo/search/config.py +75 -4
  43. mapchete_eo/search/s2_mgrs.py +8 -9
  44. mapchete_eo/search/stac_search.py +99 -97
  45. mapchete_eo/search/stac_static.py +46 -102
  46. mapchete_eo/search/utm_search.py +54 -62
  47. mapchete_eo/settings.py +1 -0
  48. mapchete_eo/sort.py +4 -6
  49. mapchete_eo/source.py +107 -0
  50. {mapchete_eo-2025.10.1.dist-info → mapchete_eo-2026.1.0.dist-info}/METADATA +4 -3
  51. mapchete_eo-2026.1.0.dist-info/RECORD +89 -0
  52. {mapchete_eo-2025.10.1.dist-info → mapchete_eo-2026.1.0.dist-info}/WHEEL +1 -1
  53. {mapchete_eo-2025.10.1.dist-info → mapchete_eo-2026.1.0.dist-info}/entry_points.txt +1 -1
  54. {mapchete_eo-2025.10.1.dist-info → mapchete_eo-2026.1.0.dist-info}/licenses/LICENSE +1 -1
  55. mapchete_eo/archives/__init__.py +0 -0
  56. mapchete_eo/archives/base.py +0 -65
  57. mapchete_eo/geometry.py +0 -271
  58. mapchete_eo/known_catalogs.py +0 -42
  59. mapchete_eo/platforms/sentinel2/archives.py +0 -190
  60. mapchete_eo/platforms/sentinel2/path_mappers/__init__.py +0 -29
  61. mapchete_eo/platforms/sentinel2/path_mappers/earthsearch.py +0 -34
  62. mapchete_eo/platforms/sentinel2/path_mappers/sinergise.py +0 -105
  63. mapchete_eo-2025.10.1.dist-info/RECORD +0 -88
@@ -1,95 +1,77 @@
1
+ from __future__ import annotations
2
+
1
3
  import logging
2
4
  from datetime import datetime
3
5
  from functools import cached_property
4
- from typing import Any, Callable, Dict, Generator, Iterator, List, Optional, Set, Union
6
+ from typing import Any, Dict, Generator, Iterator, List, Optional, Union
5
7
 
6
8
  from mapchete import Timer
7
- from mapchete.path import MPathLike
8
9
  from mapchete.tile import BufferedTilePyramid
9
10
  from mapchete.types import Bounds, BoundsLike
10
11
  from pystac import Item
11
- from pystac_client import Client
12
- from shapely.geometry import shape
12
+ from pystac_client import Client, CollectionClient, ItemSearch
13
+ from shapely.geometry import shape, box
13
14
  from shapely.geometry.base import BaseGeometry
14
15
 
15
- from mapchete_eo.product import blacklist_products
16
- from mapchete_eo.search.base import CatalogSearcher, StaticCatalogWriterMixin
17
- from mapchete_eo.search.config import StacSearchConfig
18
- from mapchete_eo.settings import mapchete_eo_settings
16
+ from mapchete_eo.search.base import CollectionSearcher, StaticCollectionWriterMixin
17
+ from mapchete_eo.search.config import StacSearchConfig, patch_invalid_assets
19
18
  from mapchete_eo.types import TimeRange
20
19
 
21
20
  logger = logging.getLogger(__name__)
22
21
 
23
22
 
24
- class STACSearchCatalog(StaticCatalogWriterMixin, CatalogSearcher):
25
- endpoint: str
26
- blacklist: Set[str] = (
27
- blacklist_products(mapchete_eo_settings.blacklist)
28
- if mapchete_eo_settings.blacklist
29
- else set()
30
- )
23
+ class STACSearchCollection(StaticCollectionWriterMixin, CollectionSearcher):
24
+ collection: str
31
25
  config_cls = StacSearchConfig
32
26
 
33
- def __init__(
34
- self,
35
- collections: Optional[List[str]] = None,
36
- stac_item_modifiers: Optional[List[Callable[[Item], Item]]] = None,
37
- endpoint: Optional[MPathLike] = None,
38
- ):
39
- if endpoint is not None:
40
- self.endpoint = endpoint
41
- if collections:
42
- self.collections = collections
43
- else: # pragma: no cover
44
- raise ValueError("collections must be given")
45
- self.stac_item_modifiers = stac_item_modifiers
46
-
47
27
  @cached_property
48
- def client(self) -> Client:
49
- return Client.open(self.endpoint)
28
+ def client(self) -> CollectionClient:
29
+ return CollectionClient.from_file(self.collection)
50
30
 
51
31
  @cached_property
52
32
  def eo_bands(self) -> List[str]:
53
- return self._eo_bands()
54
-
55
- @cached_property
56
- def id(self) -> str:
57
- return self.client.id
58
-
59
- @cached_property
60
- def description(self) -> str:
61
- return self.client.description
62
-
63
- @cached_property
64
- def stac_extensions(self) -> List[str]:
65
- return self.client.stac_extensions
33
+ item_assets = self.client.extra_fields.get("item_assets", {})
34
+ for v in item_assets.values():
35
+ if "eo:bands" in v and "data" in v.get("roles", []):
36
+ return ["eo:bands"]
37
+ else: # pragma: no cover
38
+ logger.debug("cannot find eo:bands definition from collections")
39
+ return []
66
40
 
67
41
  def search(
68
42
  self,
69
43
  time: Optional[Union[TimeRange, List[TimeRange]]] = None,
70
44
  bounds: Optional[BoundsLike] = None,
71
45
  area: Optional[BaseGeometry] = None,
46
+ query: Optional[str] = None,
72
47
  search_kwargs: Optional[Dict[str, Any]] = None,
73
48
  ) -> Generator[Item, None, None]:
74
49
  config = self.config_cls(**search_kwargs or {})
75
50
  if bounds:
76
51
  bounds = Bounds.from_inp(bounds)
77
- if time is None: # pragma: no cover
78
- raise ValueError("time must be set")
79
52
  if area is None and bounds is None: # pragma: no cover
80
53
  raise ValueError("either bounds or area have to be given")
81
54
 
82
55
  if area is not None and area.is_empty: # pragma: no cover
83
56
  return
84
57
 
85
- def _searches():
86
- for time_range in time if isinstance(time, list) else [time]:
58
+ def _searches() -> Generator[ItemSearch, None, None]:
59
+ def _search_chunks(
60
+ time_range: Optional[TimeRange] = None,
61
+ bounds: Optional[BoundsLike] = None,
62
+ area: Optional[BaseGeometry] = None,
63
+ query: Optional[str] = None,
64
+ ):
87
65
  search = self._search(
88
- time_range=time_range, bounds=bounds, area=area, config=config
66
+ time_range=time_range,
67
+ bounds=bounds,
68
+ area=box(*area.bounds) if area else None,
69
+ query=query,
70
+ config=config,
89
71
  )
90
72
  logger.debug("found %s products", search.matched())
91
73
  matched = search.matched() or 0
92
- if matched > config.catalog_chunk_threshold:
74
+ if matched > config.catalog_chunk_threshold: # pragma: no cover
93
75
  spatial_search_chunks = SpatialSearchChunks(
94
76
  bounds=bounds,
95
77
  area=area,
@@ -105,6 +87,7 @@ class STACSearchCatalog(StaticCatalogWriterMixin, CatalogSearcher):
105
87
  with Timer() as duration:
106
88
  chunk_search = self._search(
107
89
  time_range=time_range,
90
+ query=query,
108
91
  config=config,
109
92
  **chunk_kwargs,
110
93
  )
@@ -119,48 +102,62 @@ class STACSearchCatalog(StaticCatalogWriterMixin, CatalogSearcher):
119
102
  else:
120
103
  yield search
121
104
 
122
- for search in _searches():
123
- for count, item in enumerate(search.items(), 1):
124
- item_path = item.get_self_href()
125
- # logger.debug("item %s/%s ...", count, search.matched())
126
- if item_path in self.blacklist: # pragma: no cover
127
- logger.debug("item %s found in blacklist and skipping", item_path)
128
- else:
129
- yield item
105
+ if time:
106
+ # search time range(s)
107
+ for time_range in time if isinstance(time, list) else [time]:
108
+ yield from _search_chunks(
109
+ time_range=time_range,
110
+ bounds=bounds,
111
+ area=area,
112
+ query=query,
113
+ )
114
+ else:
115
+ # don't apply temporal filter
116
+ yield from _search_chunks(
117
+ bounds=bounds,
118
+ area=area,
119
+ query=query,
120
+ )
130
121
 
131
- def _eo_bands(self) -> List[str]:
132
- for collection_name in self.collections:
133
- collection = self.client.get_collection(collection_name)
134
- if collection:
135
- item_assets = collection.extra_fields.get("item_assets", {})
136
- for v in item_assets.values():
137
- if "eo:bands" in v and "data" in v.get("roles", []):
138
- return ["eo:bands"]
139
- else: # pragma: no cover
140
- raise ValueError(f"cannot find collection {collection}")
141
- else: # pragma: no cover
142
- logger.debug("cannot find eo:bands definition from collections")
143
- return []
122
+ with patch_invalid_assets():
123
+ for search in _searches():
124
+ for item in search.items():
125
+ if item.get_self_href() in self.blacklist: # pragma: no cover
126
+ logger.debug(
127
+ "item %s found in blacklist and skipping",
128
+ item.get_self_href(),
129
+ )
130
+ continue
131
+ yield item
144
132
 
145
133
  @cached_property
146
134
  def default_search_params(self):
147
135
  return {
148
- "collections": self.collections,
136
+ "collections": [self.client],
149
137
  "bbox": None,
150
138
  "intersects": None,
151
139
  }
152
140
 
141
+ @cached_property
142
+ def search_client(self) -> Client:
143
+ # looks weird, right?
144
+ #
145
+ # one would assume that directly returning self.client.get_root() would
146
+ # do the same but if we do so, it seems to ignore the "collections" parameter
147
+ # and thus query all collection available on that search endpoint.
148
+ #
149
+ # the only way to fix this, is to instantiate Client from scratch.
150
+ return Client.from_file(self.client.get_root().self_href)
151
+
153
152
  def _search(
154
153
  self,
155
154
  time_range: Optional[TimeRange] = None,
156
155
  bounds: Optional[Bounds] = None,
157
156
  area: Optional[BaseGeometry] = None,
157
+ query: Optional[str] = None,
158
158
  config: StacSearchConfig = StacSearchConfig(),
159
159
  **kwargs,
160
- ):
161
- if time_range is None: # pragma: no cover
162
- raise ValueError("time_range not provided")
163
-
160
+ ) -> ItemSearch:
164
161
  if bounds is not None:
165
162
  if shape(bounds).is_empty: # pragma: no cover
166
163
  raise ValueError("bounds empty")
@@ -170,22 +167,29 @@ class STACSearchCatalog(StaticCatalogWriterMixin, CatalogSearcher):
170
167
  raise ValueError("area empty")
171
168
  kwargs.update(intersects=area)
172
169
 
173
- start = (
174
- time_range.start.date()
175
- if isinstance(time_range.start, datetime)
176
- else time_range.start
177
- )
178
- end = (
179
- time_range.end.date()
180
- if isinstance(time_range.end, datetime)
181
- else time_range.end
182
- )
183
- search_params = dict(
184
- self.default_search_params,
185
- datetime=f"{start}/{end}",
186
- query=[f"eo:cloud_cover<={config.max_cloud_cover}"],
187
- **kwargs,
188
- )
170
+ if time_range:
171
+ start = (
172
+ time_range.start.date()
173
+ if isinstance(time_range.start, datetime)
174
+ else time_range.start
175
+ )
176
+ end = (
177
+ time_range.end.date()
178
+ if isinstance(time_range.end, datetime)
179
+ else time_range.end
180
+ )
181
+ search_params = dict(
182
+ self.default_search_params,
183
+ datetime=f"{start}/{end}",
184
+ query=[query] if query else None,
185
+ **kwargs,
186
+ )
187
+ else:
188
+ search_params = dict(
189
+ self.default_search_params,
190
+ query=[query] if query else None,
191
+ **kwargs,
192
+ )
189
193
  if (
190
194
  bounds is None
191
195
  and area is None
@@ -194,14 +198,12 @@ class STACSearchCatalog(StaticCatalogWriterMixin, CatalogSearcher):
194
198
  raise ValueError("no bounds or area given")
195
199
  logger.debug("query catalog using params: %s", search_params)
196
200
  with Timer() as duration:
197
- result = self.client.search(**search_params, limit=config.catalog_pagesize)
201
+ result = self.search_client.search(
202
+ **search_params, limit=config.catalog_pagesize
203
+ )
198
204
  logger.debug("query took %s", str(duration))
199
205
  return result
200
206
 
201
- def get_collections(self):
202
- for collection_name in self.collections:
203
- yield self.client.get_collection(collection_name)
204
-
205
207
 
206
208
  class SpatialSearchChunks:
207
209
  bounds: Bounds
@@ -1,22 +1,21 @@
1
1
  from functools import cached_property
2
2
  import logging
3
3
  import warnings
4
- from typing import Any, Callable, Dict, Generator, List, Optional, Union
4
+ from typing import Any, Dict, Generator, List, Optional, Union
5
5
 
6
6
  from mapchete import Bounds
7
7
  from mapchete.types import BoundsLike
8
8
  from pystac import Item, Catalog, Collection
9
9
  from mapchete.io.vector import bounds_intersect
10
- from mapchete.path import MPathLike
11
10
  from pystac.stac_io import StacIO
12
- from pystac_client import Client
11
+ from pystac_client import CollectionClient
13
12
  from shapely.geometry import shape
14
13
  from shapely.geometry.base import BaseGeometry
15
14
 
16
15
  from mapchete_eo.search.base import (
17
- CatalogSearcher,
16
+ CollectionSearcher,
18
17
  FSSpecStacIO,
19
- StaticCatalogWriterMixin,
18
+ StaticCollectionWriterMixin,
20
19
  filter_items,
21
20
  )
22
21
  from mapchete_eo.search.config import StacStaticConfig
@@ -29,49 +28,55 @@ logger = logging.getLogger(__name__)
29
28
  StacIO.set_default(FSSpecStacIO)
30
29
 
31
30
 
32
- class STACStaticCatalog(StaticCatalogWriterMixin, CatalogSearcher):
31
+ class STACStaticCollection(StaticCollectionWriterMixin, CollectionSearcher):
33
32
  config_cls = StacStaticConfig
34
33
 
35
- def __init__(
36
- self,
37
- baseurl: MPathLike,
38
- stac_item_modifiers: Optional[List[Callable[[Item], Item]]] = None,
39
- ):
40
- self.client = Client.from_file(str(baseurl), stac_io=FSSpecStacIO())
41
- self.collections = [c.id for c in self.client.get_children()]
42
- self.stac_item_modifiers = stac_item_modifiers
43
-
44
- @cached_property
45
- def eo_bands(self) -> List[str]:
46
- return self._eo_bands()
47
-
48
34
  @cached_property
49
- def id(self) -> str:
50
- return self.client.id
35
+ def client(self) -> CollectionClient:
36
+ return CollectionClient.from_file(str(self.collection), stac_io=FSSpecStacIO())
51
37
 
52
38
  @cached_property
53
- def description(self) -> str:
54
- return self.client.description
39
+ def eo_bands(self) -> List[str]:
40
+ eo_bands = self.client.extra_fields.get("properties", {}).get("eo:bands")
41
+ if eo_bands:
42
+ return eo_bands
43
+ else:
44
+ warnings.warn(
45
+ "Unable to read eo:bands definition from collection. "
46
+ "Trying now to get information from assets ..."
47
+ )
48
+ # see if eo:bands can be found in properties
49
+ try:
50
+ item = next(self.client.get_items(recursive=True))
51
+ eo_bands = item.properties.get("eo:bands")
52
+ if eo_bands:
53
+ return eo_bands
54
+
55
+ # look through the assets and collect eo:bands
56
+ out = {}
57
+ for asset in item.assets.values():
58
+ for eo_band in asset.extra_fields.get("eo:bands", []):
59
+ out[eo_band["name"]] = eo_band
60
+ if out:
61
+ return [v for v in out.values()]
62
+ except StopIteration:
63
+ pass
55
64
 
56
- @cached_property
57
- def stac_extensions(self) -> List[str]:
58
- return self.client.stac_extensions
65
+ logger.debug("cannot find eo:bands definition")
66
+ return []
59
67
 
60
68
  def search(
61
69
  self,
62
70
  time: Optional[Union[TimeRange, List[TimeRange]]] = None,
63
71
  bounds: Optional[BoundsLike] = None,
64
72
  area: Optional[BaseGeometry] = None,
73
+ query: Optional[str] = None,
65
74
  search_kwargs: Optional[Dict[str, Any]] = None,
66
75
  ) -> Generator[Item, None, None]:
67
- config = self.config_cls(**search_kwargs or {})
68
76
  if area is None and bounds:
69
77
  bounds = Bounds.from_inp(bounds)
70
78
  area = shape(bounds)
71
- for item in filter_items(
72
- self._raw_search(time=time, area=area),
73
- max_cloud_cover=config.max_cloud_cover,
74
- ):
79
+ for item in filter_items(self._raw_search(time=time, area=area), query=query):
75
80
  yield item
76
81
 
77
82
  def _raw_search(
@@ -82,83 +87,22 @@ class STACStaticCatalog(StaticCatalogWriterMixin, CatalogSearcher):
82
87
  if area is not None and area.is_empty:
83
88
  return
84
89
  logger.debug("iterate through children")
85
- for collection in self.client.get_collections():
86
- if time:
87
- for time_range in time if isinstance(time, list) else [time]:
88
- for item in _all_intersecting_items(
89
- collection,
90
- area=area,
91
- time_range=time_range,
92
- ):
93
- item.make_asset_hrefs_absolute()
94
- yield item
95
- else:
90
+ if time:
91
+ for time_range in time if isinstance(time, list) else [time]:
96
92
  for item in _all_intersecting_items(
97
- collection,
93
+ self.client,
98
94
  area=area,
95
+ time_range=time_range,
99
96
  ):
100
97
  item.make_asset_hrefs_absolute()
101
98
  yield item
102
-
103
- def _eo_bands(self) -> List[str]:
104
- for collection in self.client.get_children():
105
- eo_bands = collection.extra_fields.get("properties", {}).get("eo:bands")
106
- if eo_bands:
107
- return eo_bands
108
99
  else:
109
- warnings.warn(
110
- "Unable to read eo:bands definition from collections. "
111
- "Trying now to get information from assets ..."
112
- )
113
-
114
- # see if eo:bands can be found in properties
115
- item = _get_first_item(self.client.get_children())
116
- eo_bands = item.properties.get("eo:bands")
117
- if eo_bands:
118
- return eo_bands
119
-
120
- # look through the assets and collect eo:bands
121
- out = {}
122
- for asset in item.assets.values():
123
- for eo_band in asset.extra_fields.get("eo:bands", []):
124
- out[eo_band["name"]] = eo_band
125
- if out:
126
- return [v for v in out.values()]
127
-
128
- logger.debug("cannot find eo:bands definition")
129
- return []
130
-
131
- def get_collections(
132
- self,
133
- time: Optional[Union[TimeRange, List[TimeRange]]] = None,
134
- bounds: Optional[BoundsLike] = None,
135
- area: Optional[BaseGeometry] = None,
136
- ):
137
- if area is None and bounds is not None:
138
- area = Bounds.from_inp(bounds).geometry
139
- for collection in self.client.get_children():
140
- if time:
141
- for time_range in time if isinstance(time, list) else [time]:
142
- if _collection_extent_intersects(
143
- collection,
144
- area=area,
145
- time_range=time_range,
146
- ):
147
- yield collection
148
- else:
149
- if _collection_extent_intersects(collection, area=area):
150
- yield collection
151
-
152
-
153
- def _get_first_item(collections):
154
- for collection in collections:
155
- for item in collection.get_all_items():
156
- return item
157
- else:
158
- for child in collection.get_children():
159
- return _get_first_item(child)
160
- else:
161
- raise ValueError("collections contain no items")
100
+ for item in _all_intersecting_items(
101
+ self.client,
102
+ area=area,
103
+ ):
104
+ item.make_asset_hrefs_absolute()
105
+ yield item
162
106
 
163
107
 
164
108
  def _all_intersecting_items(
@@ -1,77 +1,86 @@
1
1
  import datetime
2
2
  from functools import cached_property
3
3
  import logging
4
- from typing import Any, Callable, Dict, Generator, List, Optional, Set, Union
4
+ from typing import Any, Dict, Generator, List, Optional, Union
5
5
 
6
6
  from mapchete.io.vector import fiona_open
7
7
  from mapchete.path import MPath, MPathLike
8
8
  from mapchete.types import Bounds, BoundsLike
9
9
  from pystac.collection import Collection
10
10
  from pystac.item import Item
11
+ from pystac_client import CollectionClient
11
12
  from shapely.errors import GEOSException
12
13
  from shapely.geometry import shape
13
14
  from shapely.geometry.base import BaseGeometry
14
15
 
15
16
  from mapchete_eo.exceptions import ItemGeometryError
16
- from mapchete_eo.product import blacklist_products
17
17
  from mapchete_eo.search.base import (
18
- CatalogSearcher,
19
- StaticCatalogWriterMixin,
18
+ CollectionSearcher,
19
+ StaticCollectionWriterMixin,
20
20
  filter_items,
21
21
  )
22
22
  from mapchete_eo.search.config import UTMSearchConfig
23
23
  from mapchete_eo.search.s2_mgrs import S2Tile, s2_tiles_from_bounds
24
- from mapchete_eo.settings import mapchete_eo_settings
25
24
  from mapchete_eo.time import day_range, to_datetime
26
25
  from mapchete_eo.types import TimeRange
27
26
 
28
27
  logger = logging.getLogger(__name__)
29
28
 
30
29
 
31
- class UTMSearchCatalog(StaticCatalogWriterMixin, CatalogSearcher):
32
- endpoint: str
33
- id: str
34
- day_subdir_schema: str
35
- stac_json_endswith: str
36
- description: str
37
- stac_extensions: List[str]
38
- blacklist: Set[str] = (
39
- blacklist_products(mapchete_eo_settings.blacklist)
40
- if mapchete_eo_settings.blacklist
41
- else set()
42
- )
30
+ class UTMSearchCatalog(StaticCollectionWriterMixin, CollectionSearcher):
43
31
  config_cls = UTMSearchConfig
44
32
 
45
- def __init__(
46
- self,
47
- endpoint: Optional[MPathLike] = None,
48
- collections: List[str] = [],
49
- stac_item_modifiers: Optional[List[Callable[[Item], Item]]] = None,
50
- ):
51
- self.endpoint = endpoint or self.endpoint
52
- if len(collections) == 0: # pragma: no cover
53
- raise ValueError("no collections provided")
54
- self.collections = collections
55
- self.stac_item_modifiers = stac_item_modifiers
33
+ @cached_property
34
+ def endpoint(self) -> Optional[str]:
35
+ for collection_properties in self.config.sinergise_aws_collections.values():
36
+ if collection_properties["id"] == self.collection.split("/")[-1].replace(
37
+ ".json", ""
38
+ ):
39
+ return collection_properties.get("endpoint")
40
+ return None
41
+
42
+ day_subdir_schema: str = "{year}/{month:02d}/{day:02d}"
43
+ stac_json_endswith: str = "T{tile_id}.json"
44
+
45
+ @cached_property
46
+ def client(self) -> CollectionClient:
47
+ return next(self.get_collections())
56
48
 
57
49
  @cached_property
58
50
  def eo_bands(self) -> List[str]: # pragma: no cover
59
- return self._eo_bands()
51
+ for (
52
+ collection_properties
53
+ ) in UTMSearchConfig().sinergise_aws_collections.values():
54
+ if collection_properties["id"] == self.collection.split("/")[-1]:
55
+ collection = Collection.from_dict(
56
+ collection_properties["path"].read_json()
57
+ )
58
+ if collection:
59
+ summary = collection.summaries.to_dict()
60
+ if "eo:bands" in summary:
61
+ return summary["eo:bands"]
62
+ else:
63
+ raise ValueError(f"cannot find collection {collection}")
64
+ else:
65
+ logger.debug(
66
+ "cannot find eo:bands definition from collection %s",
67
+ self.collection,
68
+ )
69
+ return []
60
70
 
61
71
  def search(
62
72
  self,
63
73
  time: Optional[Union[TimeRange, List[TimeRange]]] = None,
64
74
  bounds: Optional[BoundsLike] = None,
65
75
  area: Optional[BaseGeometry] = None,
76
+ query: Optional[str] = None,
66
77
  search_kwargs: Optional[Dict[str, Any]] = None,
67
78
  ) -> Generator[Item, None, None]:
68
- config = self.config_cls(**search_kwargs or {})
69
- if bounds:
70
- bounds = Bounds.from_inp(bounds)
71
-
72
79
  for item in filter_items(
73
- self._raw_search(time=time, bounds=bounds, area=area),
74
- max_cloud_cover=config.max_cloud_cover,
80
+ self._raw_search(
81
+ time=time, bounds=Bounds.from_inp(bounds) if bounds else None, area=area
82
+ ),
83
+ query=query,
75
84
  ):
76
85
  yield item
77
86
 
@@ -80,8 +89,9 @@ class UTMSearchCatalog(StaticCatalogWriterMixin, CatalogSearcher):
80
89
  time: Optional[Union[TimeRange, List[TimeRange]]] = None,
81
90
  bounds: Optional[Bounds] = None,
82
91
  area: Optional[BaseGeometry] = None,
83
- config: UTMSearchConfig = UTMSearchConfig(),
92
+ config: Optional[UTMSearchConfig] = None,
84
93
  ) -> Generator[Item, None, None]:
94
+ config = config or UTMSearchConfig()
85
95
  if time is None:
86
96
  raise ValueError("time must be given")
87
97
  if area is not None and area.is_empty:
@@ -92,7 +102,12 @@ class UTMSearchCatalog(StaticCatalogWriterMixin, CatalogSearcher):
92
102
  elif bounds is not None:
93
103
  bounds = Bounds.from_inp(bounds)
94
104
  area = shape(bounds)
95
- for time_range in time if isinstance(time, list) else [time]:
105
+
106
+ # Cleaner time list in case None present as time (undefined)
107
+ time_list: list[TimeRange] = (
108
+ [t for t in time if t is not None] if isinstance(time, list) else [time]
109
+ )
110
+ for time_range in time_list:
96
111
  start_time = (
97
112
  time_range.start
98
113
  if isinstance(time_range.start, datetime.date)
@@ -151,28 +166,6 @@ class UTMSearchCatalog(StaticCatalogWriterMixin, CatalogSearcher):
151
166
  elif area.intersects(shape(item.geometry)):
152
167
  yield item
153
168
 
154
- def _eo_bands(self) -> list:
155
- for collection_name in self.collections:
156
- for (
157
- collection_properties
158
- ) in UTMSearchConfig().sinergise_aws_collections.values():
159
- if collection_properties["id"] == collection_name:
160
- collection = Collection.from_dict(
161
- collection_properties["path"].read_json()
162
- )
163
- if collection:
164
- summary = collection.summaries.to_dict()
165
- if "eo:bands" in summary:
166
- return summary["eo:bands"]
167
- else:
168
- raise ValueError(f"cannot find collection {collection}")
169
- else:
170
- logger.debug(
171
- "cannot find eo:bands definition from collections %s",
172
- self.collections,
173
- )
174
- return []
175
-
176
169
  def get_collections(self):
177
170
  """
178
171
  yeild transformed collection from:
@@ -182,9 +175,8 @@ class UTMSearchCatalog(StaticCatalogWriterMixin, CatalogSearcher):
182
175
  """
183
176
  for collection_properties in self.config.sinergise_aws_collections.values():
184
177
  collection = Collection.from_dict(collection_properties["path"].read_json())
185
- for collection_name in self.collections:
186
- if collection_name == collection.id:
187
- yield collection
178
+ if self.collection.split("/")[-1] == collection.id:
179
+ yield collection
188
180
 
189
181
 
190
182
  def items_from_static_index(