mapchete-eo 2025.10.1__py2.py3-none-any.whl → 2025.11.0__py2.py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (63) hide show
  1. mapchete_eo/__init__.py +1 -1
  2. mapchete_eo/base.py +94 -54
  3. mapchete_eo/cli/options_arguments.py +11 -27
  4. mapchete_eo/cli/s2_brdf.py +1 -1
  5. mapchete_eo/cli/s2_cat_results.py +4 -20
  6. mapchete_eo/cli/s2_find_broken_products.py +4 -20
  7. mapchete_eo/cli/s2_jp2_static_catalog.py +2 -2
  8. mapchete_eo/cli/static_catalog.py +4 -45
  9. mapchete_eo/eostac.py +1 -1
  10. mapchete_eo/io/assets.py +7 -7
  11. mapchete_eo/io/items.py +36 -23
  12. mapchete_eo/io/path.py +19 -8
  13. mapchete_eo/io/products.py +22 -24
  14. mapchete_eo/platforms/sentinel2/__init__.py +1 -1
  15. mapchete_eo/platforms/sentinel2/_mapper_registry.py +89 -0
  16. mapchete_eo/platforms/sentinel2/brdf/correction.py +1 -1
  17. mapchete_eo/platforms/sentinel2/brdf/hls.py +1 -1
  18. mapchete_eo/platforms/sentinel2/brdf/models.py +1 -1
  19. mapchete_eo/platforms/sentinel2/brdf/protocols.py +1 -1
  20. mapchete_eo/platforms/sentinel2/brdf/ross_thick.py +1 -1
  21. mapchete_eo/platforms/sentinel2/brdf/sun_angle_arrays.py +1 -1
  22. mapchete_eo/platforms/sentinel2/config.py +73 -13
  23. mapchete_eo/platforms/sentinel2/driver.py +0 -39
  24. mapchete_eo/platforms/sentinel2/metadata_parser/__init__.py +6 -0
  25. mapchete_eo/platforms/sentinel2/{path_mappers → metadata_parser}/base.py +1 -1
  26. mapchete_eo/platforms/sentinel2/{path_mappers/metadata_xml.py → metadata_parser/default_path_mapper.py} +2 -2
  27. mapchete_eo/platforms/sentinel2/metadata_parser/models.py +78 -0
  28. mapchete_eo/platforms/sentinel2/{metadata_parser.py → metadata_parser/s2metadata.py} +51 -144
  29. mapchete_eo/platforms/sentinel2/preconfigured_sources/__init__.py +57 -0
  30. mapchete_eo/platforms/sentinel2/preconfigured_sources/guessers.py +108 -0
  31. mapchete_eo/platforms/sentinel2/preconfigured_sources/item_mappers.py +171 -0
  32. mapchete_eo/platforms/sentinel2/preconfigured_sources/metadata_xml_mappers.py +217 -0
  33. mapchete_eo/platforms/sentinel2/preprocessing_tasks.py +22 -1
  34. mapchete_eo/platforms/sentinel2/processing_baseline.py +3 -0
  35. mapchete_eo/platforms/sentinel2/product.py +83 -18
  36. mapchete_eo/platforms/sentinel2/source.py +114 -0
  37. mapchete_eo/platforms/sentinel2/types.py +5 -0
  38. mapchete_eo/product.py +14 -8
  39. mapchete_eo/protocols.py +5 -0
  40. mapchete_eo/search/__init__.py +3 -3
  41. mapchete_eo/search/base.py +105 -92
  42. mapchete_eo/search/config.py +25 -4
  43. mapchete_eo/search/s2_mgrs.py +8 -9
  44. mapchete_eo/search/stac_search.py +96 -77
  45. mapchete_eo/search/stac_static.py +47 -91
  46. mapchete_eo/search/utm_search.py +36 -49
  47. mapchete_eo/settings.py +1 -0
  48. mapchete_eo/sort.py +4 -6
  49. mapchete_eo/source.py +107 -0
  50. {mapchete_eo-2025.10.1.dist-info → mapchete_eo-2025.11.0.dist-info}/METADATA +2 -1
  51. mapchete_eo-2025.11.0.dist-info/RECORD +89 -0
  52. {mapchete_eo-2025.10.1.dist-info → mapchete_eo-2025.11.0.dist-info}/entry_points.txt +1 -1
  53. mapchete_eo/archives/__init__.py +0 -0
  54. mapchete_eo/archives/base.py +0 -65
  55. mapchete_eo/geometry.py +0 -271
  56. mapchete_eo/known_catalogs.py +0 -42
  57. mapchete_eo/platforms/sentinel2/archives.py +0 -190
  58. mapchete_eo/platforms/sentinel2/path_mappers/__init__.py +0 -29
  59. mapchete_eo/platforms/sentinel2/path_mappers/earthsearch.py +0 -34
  60. mapchete_eo/platforms/sentinel2/path_mappers/sinergise.py +0 -105
  61. mapchete_eo-2025.10.1.dist-info/RECORD +0 -88
  62. {mapchete_eo-2025.10.1.dist-info → mapchete_eo-2025.11.0.dist-info}/WHEEL +0 -0
  63. {mapchete_eo-2025.10.1.dist-info → mapchete_eo-2025.11.0.dist-info}/licenses/LICENSE +0 -0
mapchete_eo/protocols.py CHANGED
@@ -15,6 +15,7 @@ from mapchete.io.raster import ReferencedRaster
15
15
 
16
16
 
17
17
  class EOProductProtocol(Protocol):
18
+ id: str
18
19
  bounds: Bounds
19
20
  crs: CRS
20
21
  __geo_interface__: Optional[Dict[str, Any]]
@@ -54,3 +55,7 @@ class EOProductProtocol(Protocol):
54
55
 
55
56
  class DateTimeProtocol(Protocol):
56
57
  datetime: DateTimeLike
58
+
59
+
60
+ class GetPropertyProtocol(Protocol):
61
+ def get_property(self, property: str) -> Any: ...
@@ -7,8 +7,8 @@ of product metadata.
7
7
  It helps the InputData class to find the input products and their metadata.
8
8
  """
9
9
 
10
- from mapchete_eo.search.stac_search import STACSearchCatalog
11
- from mapchete_eo.search.stac_static import STACStaticCatalog
10
+ from mapchete_eo.search.stac_search import STACSearchCollection
11
+ from mapchete_eo.search.stac_static import STACStaticCollection
12
12
  from mapchete_eo.search.utm_search import UTMSearchCatalog
13
13
 
14
- __all__ = ["STACSearchCatalog", "STACStaticCatalog", "UTMSearchCatalog"]
14
+ __all__ = ["STACSearchCollection", "STACStaticCollection", "UTMSearchCatalog"]
@@ -4,13 +4,14 @@ import logging
4
4
  from abc import ABC, abstractmethod
5
5
  from typing import Any, Callable, Dict, Generator, List, Optional, Type, Union
6
6
 
7
+ from cql2 import Expr
7
8
  from pydantic import BaseModel
8
- from pystac import Item, Catalog, CatalogType, Extent
9
9
  from mapchete.path import MPath, MPathLike
10
10
  from mapchete.types import Bounds
11
+ from pystac import Catalog, Item, CatalogType, Extent
11
12
  from pystac.collection import Collection
12
13
  from pystac.stac_io import DefaultStacIO
13
- from pystac_client import Client
14
+ from pystac_client import CollectionClient
14
15
  from pystac_client.stac_api_io import StacApiIO
15
16
  from rasterio.profiles import Profile
16
17
  from shapely.geometry.base import BaseGeometry
@@ -44,13 +45,26 @@ class FSSpecStacIO(StacApiIO):
44
45
  return dst.write(json.dumps(json_dict, indent=2))
45
46
 
46
47
 
47
- class CatalogSearcher(ABC):
48
+ class CollectionSearcher(ABC):
48
49
  """
49
50
  This class serves as a bridge between an Archive and a catalog implementation.
50
51
  """
51
52
 
52
- collections: List[str]
53
53
  config_cls: Type[BaseModel]
54
+ collection: str
55
+ stac_item_modifiers: Optional[List[Callable[[Item], Item]]] = None
56
+
57
+ def __init__(
58
+ self,
59
+ collection: str,
60
+ stac_item_modifiers: Optional[List[Callable[[Item], Item]]] = None,
61
+ ):
62
+ self.collection = collection
63
+ self.stac_item_modifiers = stac_item_modifiers
64
+
65
+ @abstractmethod
66
+ @cached_property
67
+ def client(self) -> CollectionClient: ...
54
68
 
55
69
  @abstractmethod
56
70
  @cached_property
@@ -74,20 +88,17 @@ class CatalogSearcher(ABC):
74
88
  time: Optional[Union[TimeRange, List[TimeRange]]] = None,
75
89
  bounds: Optional[Bounds] = None,
76
90
  area: Optional[BaseGeometry] = None,
91
+ query: Optional[str] = None,
77
92
  search_kwargs: Optional[Dict[str, Any]] = None,
78
93
  ) -> Generator[Item, None, None]: ...
79
94
 
80
95
 
81
- class StaticCatalogWriterMixin(CatalogSearcher):
96
+ class StaticCollectionWriterMixin(CollectionSearcher):
82
97
  # client: Client
83
98
  # id: str
84
99
  # description: str
85
100
  # stac_extensions: List[str]
86
101
 
87
- @abstractmethod
88
- def get_collections(self) -> List[Collection]: # pragma: no cover
89
- ...
90
-
91
102
  def write_static_catalog(
92
103
  self,
93
104
  output_path: MPathLike,
@@ -113,94 +124,93 @@ class StaticCatalogWriterMixin(CatalogSearcher):
113
124
  catalog_json = output_path / "catalog.json"
114
125
  if catalog_json.exists():
115
126
  logger.debug("open existing catalog %s", str(catalog_json))
116
- client = Client.from_file(catalog_json)
117
- # catalog = pystac.Catalog.from_file(catalog_json)
118
- existing_collections = list(client.get_collections())
127
+ catalog = Catalog.from_file(catalog_json)
128
+ # client = Client.from_file(catalog_json)
129
+ # existing_collection = client.get_collection(self.id)
119
130
  else:
120
- existing_collections = []
121
- catalog = Catalog(
122
- name or f"{self.id}",
123
- description or f"Static subset of {self.description}",
124
- stac_extensions=self.stac_extensions,
125
- href=str(catalog_json),
126
- catalog_type=CatalogType.SELF_CONTAINED,
127
- )
131
+ # existing_collections = []
132
+ catalog = Catalog(
133
+ name or f"{self.id}",
134
+ description or f"Static subset of {self.description}",
135
+ stac_extensions=self.stac_extensions,
136
+ href=str(catalog_json),
137
+ catalog_type=CatalogType.SELF_CONTAINED,
138
+ )
128
139
  src_items = list(
129
140
  self.search(
130
141
  time=time, bounds=bounds, area=area, search_kwargs=search_kwargs
131
142
  )
132
143
  )
133
- for collection in self.get_collections():
134
- # collect all items and download assets if required
135
- items: List[Item] = []
136
- item_ids = set()
137
- for n, item in enumerate(src_items, 1):
138
- logger.debug("found item %s", item)
139
- item = item.clone()
140
- if assets:
141
- logger.debug("get assets %s", assets)
142
- item = get_assets(
143
- item,
144
- assets,
145
- output_path / collection.id / item.id,
146
- resolution=assets_dst_resolution,
147
- convert_profile=assets_convert_profile,
148
- overwrite=overwrite,
149
- ignore_if_exists=True,
150
- )
151
- if copy_metadata:
152
- item = get_metadata_assets(
153
- item,
154
- output_path / collection.id / item.id,
155
- metadata_parser_classes=metadata_parser_classes,
156
- resolution=assets_dst_resolution,
157
- convert_profile=assets_convert_profile,
158
- overwrite=overwrite,
159
- )
160
- # this has to be set to None, otherwise pystac will mess up the asset paths
161
- # after normalizing
162
- item.set_self_href(None)
163
-
164
- items.append(item)
165
- item_ids.add(item.id)
166
-
167
- if progress_callback:
168
- progress_callback(n=n, total=len(src_items))
169
-
170
- for existing_collection in existing_collections:
171
- if existing_collection.id == collection.id:
172
- logger.debug("try to find unregistered items in collection")
173
- collection_root_path = MPath.from_inp(
174
- existing_collection.get_self_href()
175
- ).parent
176
- for subpath in collection_root_path.ls():
177
- if subpath.is_directory():
178
- try:
179
- item = Item.from_file(
180
- subpath / subpath.with_suffix(".json").name
181
- )
182
- if item.id not in item_ids:
183
- logger.debug(
184
- "add existing item with id %s", item.id
185
- )
186
- items.append(item)
187
- item_ids.add(item.id)
188
- except FileNotFoundError:
189
- pass
190
- break
144
+ # collect all items and download assets if required
145
+ items: List[Item] = []
146
+ item_ids = set()
147
+ for n, item in enumerate(src_items, 1):
148
+ logger.debug("found item %s", item)
149
+ item = item.clone()
150
+ if assets:
151
+ logger.debug("get assets %s", assets)
152
+ item = get_assets(
153
+ item,
154
+ assets,
155
+ output_path / self.id / item.id,
156
+ resolution=assets_dst_resolution,
157
+ convert_profile=assets_convert_profile,
158
+ overwrite=overwrite,
159
+ ignore_if_exists=True,
160
+ )
161
+ if copy_metadata:
162
+ item = get_metadata_assets(
163
+ item,
164
+ output_path / self.id / item.id,
165
+ metadata_parser_classes=metadata_parser_classes,
166
+ resolution=assets_dst_resolution,
167
+ convert_profile=assets_convert_profile,
168
+ overwrite=overwrite,
169
+ )
170
+ # this has to be set to None, otherwise pystac will mess up the asset paths
171
+ # after normalizing
172
+ item.set_self_href(None)
173
+
174
+ items.append(item)
175
+ item_ids.add(item.id)
176
+
177
+ if progress_callback:
178
+ progress_callback(n=n, total=len(src_items))
179
+
180
+ # for existing_collection in existing_collections:
181
+ # if existing_collection.id == collection.id:
182
+ # logger.debug("try to find unregistered items in collection")
183
+ # collection_root_path = MPath.from_inp(
184
+ # existing_collection.get_self_href()
185
+ # ).parent
186
+ # for subpath in collection_root_path.ls():
187
+ # if subpath.is_directory():
188
+ # try:
189
+ # item = Item.from_file(
190
+ # subpath / subpath.with_suffix(".json").name
191
+ # )
192
+ # if item.id not in item_ids:
193
+ # logger.debug(
194
+ # "add existing item with id %s", item.id
195
+ # )
196
+ # items.append(item)
197
+ # item_ids.add(item.id)
198
+ # except FileNotFoundError:
199
+ # pass
200
+ # break
191
201
  # create collection and copy metadata
192
202
  logger.debug("create new collection")
193
203
  out_collection = Collection(
194
- id=collection.id,
204
+ id=self.id,
195
205
  extent=Extent.from_items(items),
196
- description=collection.description,
197
- title=collection.title,
198
- stac_extensions=collection.stac_extensions,
199
- license=collection.license,
200
- keywords=collection.keywords,
201
- providers=collection.providers,
202
- summaries=collection.summaries,
203
- extra_fields=collection.extra_fields,
206
+ description=self.description,
207
+ title=self.client.title,
208
+ stac_extensions=self.stac_extensions,
209
+ license=self.client.license,
210
+ keywords=self.client.keywords,
211
+ providers=self.client.providers,
212
+ summaries=self.client.summaries,
213
+ extra_fields=self.client.extra_fields,
204
214
  catalog_type=CatalogType.SELF_CONTAINED,
205
215
  )
206
216
 
@@ -222,14 +232,17 @@ class StaticCatalogWriterMixin(CatalogSearcher):
222
232
 
223
233
  def filter_items(
224
234
  items: Generator[Item, None, None],
225
- cloud_cover_field: str = "eo:cloud_cover",
226
- max_cloud_cover: float = 100.0,
235
+ query: Optional[str] = None,
227
236
  ) -> Generator[Item, None, None]:
228
237
  """
229
238
  Only for cloudcover now, this can and should be adapted for filter field and value
230
239
  the field and value for the item filter would be defined in search.config.py corresponding configs
231
240
  and passed down to the individual search approaches via said config and this Function.
232
241
  """
233
- for item in items:
234
- if item.properties.get(cloud_cover_field, 0.0) <= max_cloud_cover:
235
- yield item
242
+ if query:
243
+ expr = Expr(query)
244
+ for item in items:
245
+ if expr.matches(item.properties):
246
+ yield item
247
+ else:
248
+ yield from items
@@ -1,23 +1,44 @@
1
- from typing import Optional
1
+ from typing import Optional, Dict, Any
2
2
 
3
3
  from mapchete.path import MPath, MPathLike
4
- from pydantic import BaseModel
4
+ from pydantic import BaseModel, model_validator
5
5
 
6
6
 
7
7
  class StacSearchConfig(BaseModel):
8
8
  max_cloud_cover: float = 100.0
9
+ query: Optional[str] = None
9
10
  catalog_chunk_threshold: int = 10_000
10
11
  catalog_chunk_zoom: int = 5
11
12
  catalog_pagesize: int = 100
12
13
  footprint_buffer: float = 0
13
14
 
15
+ @model_validator(mode="before")
16
+ def deprecate_max_cloud_cover(cls, values: Dict[str, Any]) -> Dict[str, Any]:
17
+ if "max_cloud_cover" in values: # pragma: no cover
18
+ raise DeprecationWarning(
19
+ "'max_cloud_cover' will be deprecated soon. Please use 'eo:cloud_cover<=...' in the source 'query' field.",
20
+ )
21
+ return values
22
+
14
23
 
15
24
  class StacStaticConfig(BaseModel):
16
- max_cloud_cover: float = 100.0
25
+ @model_validator(mode="before")
26
+ def deprecate_max_cloud_cover(cls, values: Dict[str, Any]) -> Dict[str, Any]:
27
+ if "max_cloud_cover" in values: # pragma: no cover
28
+ raise DeprecationWarning(
29
+ "'max_cloud_cover' will be deprecated soon. Please use 'eo:cloud_cover<=...' in the source 'query' field.",
30
+ )
31
+ return values
17
32
 
18
33
 
19
34
  class UTMSearchConfig(BaseModel):
20
- max_cloud_cover: float = 100.0
35
+ @model_validator(mode="before")
36
+ def deprecate_max_cloud_cover(cls, values: Dict[str, Any]) -> Dict[str, Any]:
37
+ if "max_cloud_cover" in values: # pragma: no cover
38
+ raise DeprecationWarning(
39
+ "'max_cloud_cover' will be deprecated soon. Please use 'eo:cloud_cover<=...' in the source 'query' field.",
40
+ )
41
+ return values
21
42
 
22
43
  sinergise_aws_collections: dict = dict(
23
44
  S2_L2A=dict(
@@ -6,18 +6,17 @@ from functools import cached_property
6
6
  from itertools import product
7
7
  from typing import List, Literal, Optional, Tuple, Union
8
8
 
9
- from mapchete.geometry import reproject_geometry
9
+ from mapchete.geometry import (
10
+ reproject_geometry,
11
+ repair_antimeridian_geometry,
12
+ transform_to_latlon,
13
+ )
10
14
  from mapchete.types import Bounds
11
15
  from rasterio.crs import CRS
12
16
  from shapely import prepare
13
17
  from shapely.geometry import box, mapping, shape
14
18
  from shapely.geometry.base import BaseGeometry
15
19
 
16
- from mapchete_eo.geometry import (
17
- bounds_to_geom,
18
- repair_antimeridian_geometry,
19
- transform_to_latlon,
20
- )
21
20
 
22
21
  LATLON_LEFT = -180
23
22
  LATLON_RIGHT = 180
@@ -255,7 +254,7 @@ class S2Tile:
255
254
  grid_square = tile_id[3:]
256
255
  try:
257
256
  int(utm_zone)
258
- except Exception:
257
+ except Exception: # pragma: no cover
259
258
  raise ValueError(f"invalid UTM zone given: {utm_zone}")
260
259
 
261
260
  return MGRSCell(utm_zone, latitude_band).tile(grid_square)
@@ -268,7 +267,7 @@ class S2Tile:
268
267
  def s2_tiles_from_bounds(
269
268
  left: float, bottom: float, right: float, top: float
270
269
  ) -> List[S2Tile]:
271
- bounds = Bounds(left, bottom, right, top)
270
+ bounds = Bounds(left, bottom, right, top, crs="EPSG:4326")
272
271
 
273
272
  # determine zones in eastern-western direction
274
273
  min_zone_idx = math.floor((left + LATLON_WIDTH_OFFSET) / UTM_ZONE_WIDTH)
@@ -291,7 +290,7 @@ def s2_tiles_from_bounds(
291
290
  min_latitude_band_idx -= 1
292
291
  max_latitude_band_idx += 1
293
292
 
294
- aoi = bounds_to_geom(bounds)
293
+ aoi = bounds.latlon_geometry()
295
294
  prepare(aoi)
296
295
 
297
296
  def tiles_generator():
@@ -1,19 +1,20 @@
1
+ from __future__ import annotations
2
+
1
3
  import logging
2
4
  from datetime import datetime
3
5
  from functools import cached_property
4
- from typing import Any, Callable, Dict, Generator, Iterator, List, Optional, Set, Union
6
+ from typing import Any, Dict, Generator, Iterator, List, Optional, Set, Union
5
7
 
6
8
  from mapchete import Timer
7
- from mapchete.path import MPathLike
8
9
  from mapchete.tile import BufferedTilePyramid
9
10
  from mapchete.types import Bounds, BoundsLike
10
11
  from pystac import Item
11
- from pystac_client import Client
12
- from shapely.geometry import shape
12
+ from pystac_client import Client, CollectionClient, ItemSearch
13
+ from shapely.geometry import shape, box
13
14
  from shapely.geometry.base import BaseGeometry
14
15
 
15
16
  from mapchete_eo.product import blacklist_products
16
- from mapchete_eo.search.base import CatalogSearcher, StaticCatalogWriterMixin
17
+ from mapchete_eo.search.base import CollectionSearcher, StaticCollectionWriterMixin
17
18
  from mapchete_eo.search.config import StacSearchConfig
18
19
  from mapchete_eo.settings import mapchete_eo_settings
19
20
  from mapchete_eo.types import TimeRange
@@ -21,8 +22,8 @@ from mapchete_eo.types import TimeRange
21
22
  logger = logging.getLogger(__name__)
22
23
 
23
24
 
24
- class STACSearchCatalog(StaticCatalogWriterMixin, CatalogSearcher):
25
- endpoint: str
25
+ class STACSearchCollection(StaticCollectionWriterMixin, CollectionSearcher):
26
+ collection: str
26
27
  blacklist: Set[str] = (
27
28
  blacklist_products(mapchete_eo_settings.blacklist)
28
29
  if mapchete_eo_settings.blacklist
@@ -30,27 +31,19 @@ class STACSearchCatalog(StaticCatalogWriterMixin, CatalogSearcher):
30
31
  )
31
32
  config_cls = StacSearchConfig
32
33
 
33
- def __init__(
34
- self,
35
- collections: Optional[List[str]] = None,
36
- stac_item_modifiers: Optional[List[Callable[[Item], Item]]] = None,
37
- endpoint: Optional[MPathLike] = None,
38
- ):
39
- if endpoint is not None:
40
- self.endpoint = endpoint
41
- if collections:
42
- self.collections = collections
43
- else: # pragma: no cover
44
- raise ValueError("collections must be given")
45
- self.stac_item_modifiers = stac_item_modifiers
46
-
47
34
  @cached_property
48
- def client(self) -> Client:
49
- return Client.open(self.endpoint)
35
+ def client(self) -> CollectionClient:
36
+ return CollectionClient.from_file(self.collection)
50
37
 
51
38
  @cached_property
52
39
  def eo_bands(self) -> List[str]:
53
- return self._eo_bands()
40
+ item_assets = self.client.extra_fields.get("item_assets", {})
41
+ for v in item_assets.values():
42
+ if "eo:bands" in v and "data" in v.get("roles", []):
43
+ return ["eo:bands"]
44
+ else: # pragma: no cover
45
+ logger.debug("cannot find eo:bands definition from collections")
46
+ return []
54
47
 
55
48
  @cached_property
56
49
  def id(self) -> str:
@@ -69,27 +62,35 @@ class STACSearchCatalog(StaticCatalogWriterMixin, CatalogSearcher):
69
62
  time: Optional[Union[TimeRange, List[TimeRange]]] = None,
70
63
  bounds: Optional[BoundsLike] = None,
71
64
  area: Optional[BaseGeometry] = None,
65
+ query: Optional[str] = None,
72
66
  search_kwargs: Optional[Dict[str, Any]] = None,
73
67
  ) -> Generator[Item, None, None]:
74
68
  config = self.config_cls(**search_kwargs or {})
75
69
  if bounds:
76
70
  bounds = Bounds.from_inp(bounds)
77
- if time is None: # pragma: no cover
78
- raise ValueError("time must be set")
79
71
  if area is None and bounds is None: # pragma: no cover
80
72
  raise ValueError("either bounds or area have to be given")
81
73
 
82
74
  if area is not None and area.is_empty: # pragma: no cover
83
75
  return
84
76
 
85
- def _searches():
86
- for time_range in time if isinstance(time, list) else [time]:
77
+ def _searches() -> Generator[ItemSearch, None, None]:
78
+ def _search_chunks(
79
+ time_range: Optional[TimeRange] = None,
80
+ bounds: Optional[BoundsLike] = None,
81
+ area: Optional[BaseGeometry] = None,
82
+ query: Optional[str] = None,
83
+ ):
87
84
  search = self._search(
88
- time_range=time_range, bounds=bounds, area=area, config=config
85
+ time_range=time_range,
86
+ bounds=bounds,
87
+ area=box(*area.bounds) if area else None,
88
+ query=query,
89
+ config=config,
89
90
  )
90
91
  logger.debug("found %s products", search.matched())
91
92
  matched = search.matched() or 0
92
- if matched > config.catalog_chunk_threshold:
93
+ if matched > config.catalog_chunk_threshold: # pragma: no cover
93
94
  spatial_search_chunks = SpatialSearchChunks(
94
95
  bounds=bounds,
95
96
  area=area,
@@ -105,6 +106,7 @@ class STACSearchCatalog(StaticCatalogWriterMixin, CatalogSearcher):
105
106
  with Timer() as duration:
106
107
  chunk_search = self._search(
107
108
  time_range=time_range,
109
+ query=query,
108
110
  config=config,
109
111
  **chunk_kwargs,
110
112
  )
@@ -119,48 +121,60 @@ class STACSearchCatalog(StaticCatalogWriterMixin, CatalogSearcher):
119
121
  else:
120
122
  yield search
121
123
 
122
- for search in _searches():
123
- for count, item in enumerate(search.items(), 1):
124
- item_path = item.get_self_href()
125
- # logger.debug("item %s/%s ...", count, search.matched())
126
- if item_path in self.blacklist: # pragma: no cover
127
- logger.debug("item %s found in blacklist and skipping", item_path)
128
- else:
129
- yield item
124
+ if time:
125
+ # search time range(s)
126
+ for time_range in time if isinstance(time, list) else [time]:
127
+ yield from _search_chunks(
128
+ time_range=time_range,
129
+ bounds=bounds,
130
+ area=area,
131
+ query=query,
132
+ )
133
+ else:
134
+ # don't apply temporal filter
135
+ yield from _search_chunks(
136
+ bounds=bounds,
137
+ area=area,
138
+ query=query,
139
+ )
130
140
 
131
- def _eo_bands(self) -> List[str]:
132
- for collection_name in self.collections:
133
- collection = self.client.get_collection(collection_name)
134
- if collection:
135
- item_assets = collection.extra_fields.get("item_assets", {})
136
- for v in item_assets.values():
137
- if "eo:bands" in v and "data" in v.get("roles", []):
138
- return ["eo:bands"]
139
- else: # pragma: no cover
140
- raise ValueError(f"cannot find collection {collection}")
141
- else: # pragma: no cover
142
- logger.debug("cannot find eo:bands definition from collections")
143
- return []
141
+ for search in _searches():
142
+ for item in search.items():
143
+ if item.get_self_href() in self.blacklist: # pragma: no cover
144
+ logger.debug(
145
+ "item %s found in blacklist and skipping", item.get_self_href()
146
+ )
147
+ continue
148
+ yield item
144
149
 
145
150
  @cached_property
146
151
  def default_search_params(self):
147
152
  return {
148
- "collections": self.collections,
153
+ "collections": [self.client],
149
154
  "bbox": None,
150
155
  "intersects": None,
151
156
  }
152
157
 
158
+ @cached_property
159
+ def search_client(self) -> Client:
160
+ # looks weird, right?
161
+ #
162
+ # one would assume that directly returning self.client.get_root() would
163
+ # do the same but if we do so, it seems to ignore the "collections" parameter
164
+ # and thus query all collection available on that search endpoint.
165
+ #
166
+ # the only way to fix this, is to instantiate Client from scratch.
167
+ return Client.from_file(self.client.get_root().self_href)
168
+
153
169
  def _search(
154
170
  self,
155
171
  time_range: Optional[TimeRange] = None,
156
172
  bounds: Optional[Bounds] = None,
157
173
  area: Optional[BaseGeometry] = None,
174
+ query: Optional[str] = None,
158
175
  config: StacSearchConfig = StacSearchConfig(),
159
176
  **kwargs,
160
- ):
161
- if time_range is None: # pragma: no cover
162
- raise ValueError("time_range not provided")
163
-
177
+ ) -> ItemSearch:
164
178
  if bounds is not None:
165
179
  if shape(bounds).is_empty: # pragma: no cover
166
180
  raise ValueError("bounds empty")
@@ -170,22 +184,29 @@ class STACSearchCatalog(StaticCatalogWriterMixin, CatalogSearcher):
170
184
  raise ValueError("area empty")
171
185
  kwargs.update(intersects=area)
172
186
 
173
- start = (
174
- time_range.start.date()
175
- if isinstance(time_range.start, datetime)
176
- else time_range.start
177
- )
178
- end = (
179
- time_range.end.date()
180
- if isinstance(time_range.end, datetime)
181
- else time_range.end
182
- )
183
- search_params = dict(
184
- self.default_search_params,
185
- datetime=f"{start}/{end}",
186
- query=[f"eo:cloud_cover<={config.max_cloud_cover}"],
187
- **kwargs,
188
- )
187
+ if time_range:
188
+ start = (
189
+ time_range.start.date()
190
+ if isinstance(time_range.start, datetime)
191
+ else time_range.start
192
+ )
193
+ end = (
194
+ time_range.end.date()
195
+ if isinstance(time_range.end, datetime)
196
+ else time_range.end
197
+ )
198
+ search_params = dict(
199
+ self.default_search_params,
200
+ datetime=f"{start}/{end}",
201
+ query=[query] if query else None,
202
+ **kwargs,
203
+ )
204
+ else:
205
+ search_params = dict(
206
+ self.default_search_params,
207
+ query=[query] if query else None,
208
+ **kwargs,
209
+ )
189
210
  if (
190
211
  bounds is None
191
212
  and area is None
@@ -194,14 +215,12 @@ class STACSearchCatalog(StaticCatalogWriterMixin, CatalogSearcher):
194
215
  raise ValueError("no bounds or area given")
195
216
  logger.debug("query catalog using params: %s", search_params)
196
217
  with Timer() as duration:
197
- result = self.client.search(**search_params, limit=config.catalog_pagesize)
218
+ result = self.search_client.search(
219
+ **search_params, limit=config.catalog_pagesize
220
+ )
198
221
  logger.debug("query took %s", str(duration))
199
222
  return result
200
223
 
201
- def get_collections(self):
202
- for collection_name in self.collections:
203
- yield self.client.get_collection(collection_name)
204
-
205
224
 
206
225
  class SpatialSearchChunks:
207
226
  bounds: Bounds