mapchete-eo 2025.10.1__py2.py3-none-any.whl → 2025.11.0__py2.py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- mapchete_eo/__init__.py +1 -1
- mapchete_eo/base.py +94 -54
- mapchete_eo/cli/options_arguments.py +11 -27
- mapchete_eo/cli/s2_brdf.py +1 -1
- mapchete_eo/cli/s2_cat_results.py +4 -20
- mapchete_eo/cli/s2_find_broken_products.py +4 -20
- mapchete_eo/cli/s2_jp2_static_catalog.py +2 -2
- mapchete_eo/cli/static_catalog.py +4 -45
- mapchete_eo/eostac.py +1 -1
- mapchete_eo/io/assets.py +7 -7
- mapchete_eo/io/items.py +36 -23
- mapchete_eo/io/path.py +19 -8
- mapchete_eo/io/products.py +22 -24
- mapchete_eo/platforms/sentinel2/__init__.py +1 -1
- mapchete_eo/platforms/sentinel2/_mapper_registry.py +89 -0
- mapchete_eo/platforms/sentinel2/brdf/correction.py +1 -1
- mapchete_eo/platforms/sentinel2/brdf/hls.py +1 -1
- mapchete_eo/platforms/sentinel2/brdf/models.py +1 -1
- mapchete_eo/platforms/sentinel2/brdf/protocols.py +1 -1
- mapchete_eo/platforms/sentinel2/brdf/ross_thick.py +1 -1
- mapchete_eo/platforms/sentinel2/brdf/sun_angle_arrays.py +1 -1
- mapchete_eo/platforms/sentinel2/config.py +73 -13
- mapchete_eo/platforms/sentinel2/driver.py +0 -39
- mapchete_eo/platforms/sentinel2/metadata_parser/__init__.py +6 -0
- mapchete_eo/platforms/sentinel2/{path_mappers → metadata_parser}/base.py +1 -1
- mapchete_eo/platforms/sentinel2/{path_mappers/metadata_xml.py → metadata_parser/default_path_mapper.py} +2 -2
- mapchete_eo/platforms/sentinel2/metadata_parser/models.py +78 -0
- mapchete_eo/platforms/sentinel2/{metadata_parser.py → metadata_parser/s2metadata.py} +51 -144
- mapchete_eo/platforms/sentinel2/preconfigured_sources/__init__.py +57 -0
- mapchete_eo/platforms/sentinel2/preconfigured_sources/guessers.py +108 -0
- mapchete_eo/platforms/sentinel2/preconfigured_sources/item_mappers.py +171 -0
- mapchete_eo/platforms/sentinel2/preconfigured_sources/metadata_xml_mappers.py +217 -0
- mapchete_eo/platforms/sentinel2/preprocessing_tasks.py +22 -1
- mapchete_eo/platforms/sentinel2/processing_baseline.py +3 -0
- mapchete_eo/platforms/sentinel2/product.py +83 -18
- mapchete_eo/platforms/sentinel2/source.py +114 -0
- mapchete_eo/platforms/sentinel2/types.py +5 -0
- mapchete_eo/product.py +14 -8
- mapchete_eo/protocols.py +5 -0
- mapchete_eo/search/__init__.py +3 -3
- mapchete_eo/search/base.py +105 -92
- mapchete_eo/search/config.py +25 -4
- mapchete_eo/search/s2_mgrs.py +8 -9
- mapchete_eo/search/stac_search.py +96 -77
- mapchete_eo/search/stac_static.py +47 -91
- mapchete_eo/search/utm_search.py +36 -49
- mapchete_eo/settings.py +1 -0
- mapchete_eo/sort.py +4 -6
- mapchete_eo/source.py +107 -0
- {mapchete_eo-2025.10.1.dist-info → mapchete_eo-2025.11.0.dist-info}/METADATA +2 -1
- mapchete_eo-2025.11.0.dist-info/RECORD +89 -0
- {mapchete_eo-2025.10.1.dist-info → mapchete_eo-2025.11.0.dist-info}/entry_points.txt +1 -1
- mapchete_eo/archives/__init__.py +0 -0
- mapchete_eo/archives/base.py +0 -65
- mapchete_eo/geometry.py +0 -271
- mapchete_eo/known_catalogs.py +0 -42
- mapchete_eo/platforms/sentinel2/archives.py +0 -190
- mapchete_eo/platforms/sentinel2/path_mappers/__init__.py +0 -29
- mapchete_eo/platforms/sentinel2/path_mappers/earthsearch.py +0 -34
- mapchete_eo/platforms/sentinel2/path_mappers/sinergise.py +0 -105
- mapchete_eo-2025.10.1.dist-info/RECORD +0 -88
- {mapchete_eo-2025.10.1.dist-info → mapchete_eo-2025.11.0.dist-info}/WHEEL +0 -0
- {mapchete_eo-2025.10.1.dist-info → mapchete_eo-2025.11.0.dist-info}/licenses/LICENSE +0 -0
mapchete_eo/protocols.py
CHANGED
|
@@ -15,6 +15,7 @@ from mapchete.io.raster import ReferencedRaster
|
|
|
15
15
|
|
|
16
16
|
|
|
17
17
|
class EOProductProtocol(Protocol):
|
|
18
|
+
id: str
|
|
18
19
|
bounds: Bounds
|
|
19
20
|
crs: CRS
|
|
20
21
|
__geo_interface__: Optional[Dict[str, Any]]
|
|
@@ -54,3 +55,7 @@ class EOProductProtocol(Protocol):
|
|
|
54
55
|
|
|
55
56
|
class DateTimeProtocol(Protocol):
|
|
56
57
|
datetime: DateTimeLike
|
|
58
|
+
|
|
59
|
+
|
|
60
|
+
class GetPropertyProtocol(Protocol):
|
|
61
|
+
def get_property(self, property: str) -> Any: ...
|
mapchete_eo/search/__init__.py
CHANGED
|
@@ -7,8 +7,8 @@ of product metadata.
|
|
|
7
7
|
It helps the InputData class to find the input products and their metadata.
|
|
8
8
|
"""
|
|
9
9
|
|
|
10
|
-
from mapchete_eo.search.stac_search import
|
|
11
|
-
from mapchete_eo.search.stac_static import
|
|
10
|
+
from mapchete_eo.search.stac_search import STACSearchCollection
|
|
11
|
+
from mapchete_eo.search.stac_static import STACStaticCollection
|
|
12
12
|
from mapchete_eo.search.utm_search import UTMSearchCatalog
|
|
13
13
|
|
|
14
|
-
__all__ = ["
|
|
14
|
+
__all__ = ["STACSearchCollection", "STACStaticCollection", "UTMSearchCatalog"]
|
mapchete_eo/search/base.py
CHANGED
|
@@ -4,13 +4,14 @@ import logging
|
|
|
4
4
|
from abc import ABC, abstractmethod
|
|
5
5
|
from typing import Any, Callable, Dict, Generator, List, Optional, Type, Union
|
|
6
6
|
|
|
7
|
+
from cql2 import Expr
|
|
7
8
|
from pydantic import BaseModel
|
|
8
|
-
from pystac import Item, Catalog, CatalogType, Extent
|
|
9
9
|
from mapchete.path import MPath, MPathLike
|
|
10
10
|
from mapchete.types import Bounds
|
|
11
|
+
from pystac import Catalog, Item, CatalogType, Extent
|
|
11
12
|
from pystac.collection import Collection
|
|
12
13
|
from pystac.stac_io import DefaultStacIO
|
|
13
|
-
from pystac_client import
|
|
14
|
+
from pystac_client import CollectionClient
|
|
14
15
|
from pystac_client.stac_api_io import StacApiIO
|
|
15
16
|
from rasterio.profiles import Profile
|
|
16
17
|
from shapely.geometry.base import BaseGeometry
|
|
@@ -44,13 +45,26 @@ class FSSpecStacIO(StacApiIO):
|
|
|
44
45
|
return dst.write(json.dumps(json_dict, indent=2))
|
|
45
46
|
|
|
46
47
|
|
|
47
|
-
class
|
|
48
|
+
class CollectionSearcher(ABC):
|
|
48
49
|
"""
|
|
49
50
|
This class serves as a bridge between an Archive and a catalog implementation.
|
|
50
51
|
"""
|
|
51
52
|
|
|
52
|
-
collections: List[str]
|
|
53
53
|
config_cls: Type[BaseModel]
|
|
54
|
+
collection: str
|
|
55
|
+
stac_item_modifiers: Optional[List[Callable[[Item], Item]]] = None
|
|
56
|
+
|
|
57
|
+
def __init__(
|
|
58
|
+
self,
|
|
59
|
+
collection: str,
|
|
60
|
+
stac_item_modifiers: Optional[List[Callable[[Item], Item]]] = None,
|
|
61
|
+
):
|
|
62
|
+
self.collection = collection
|
|
63
|
+
self.stac_item_modifiers = stac_item_modifiers
|
|
64
|
+
|
|
65
|
+
@abstractmethod
|
|
66
|
+
@cached_property
|
|
67
|
+
def client(self) -> CollectionClient: ...
|
|
54
68
|
|
|
55
69
|
@abstractmethod
|
|
56
70
|
@cached_property
|
|
@@ -74,20 +88,17 @@ class CatalogSearcher(ABC):
|
|
|
74
88
|
time: Optional[Union[TimeRange, List[TimeRange]]] = None,
|
|
75
89
|
bounds: Optional[Bounds] = None,
|
|
76
90
|
area: Optional[BaseGeometry] = None,
|
|
91
|
+
query: Optional[str] = None,
|
|
77
92
|
search_kwargs: Optional[Dict[str, Any]] = None,
|
|
78
93
|
) -> Generator[Item, None, None]: ...
|
|
79
94
|
|
|
80
95
|
|
|
81
|
-
class
|
|
96
|
+
class StaticCollectionWriterMixin(CollectionSearcher):
|
|
82
97
|
# client: Client
|
|
83
98
|
# id: str
|
|
84
99
|
# description: str
|
|
85
100
|
# stac_extensions: List[str]
|
|
86
101
|
|
|
87
|
-
@abstractmethod
|
|
88
|
-
def get_collections(self) -> List[Collection]: # pragma: no cover
|
|
89
|
-
...
|
|
90
|
-
|
|
91
102
|
def write_static_catalog(
|
|
92
103
|
self,
|
|
93
104
|
output_path: MPathLike,
|
|
@@ -113,94 +124,93 @@ class StaticCatalogWriterMixin(CatalogSearcher):
|
|
|
113
124
|
catalog_json = output_path / "catalog.json"
|
|
114
125
|
if catalog_json.exists():
|
|
115
126
|
logger.debug("open existing catalog %s", str(catalog_json))
|
|
116
|
-
|
|
117
|
-
#
|
|
118
|
-
|
|
127
|
+
catalog = Catalog.from_file(catalog_json)
|
|
128
|
+
# client = Client.from_file(catalog_json)
|
|
129
|
+
# existing_collection = client.get_collection(self.id)
|
|
119
130
|
else:
|
|
120
|
-
existing_collections = []
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
131
|
+
# existing_collections = []
|
|
132
|
+
catalog = Catalog(
|
|
133
|
+
name or f"{self.id}",
|
|
134
|
+
description or f"Static subset of {self.description}",
|
|
135
|
+
stac_extensions=self.stac_extensions,
|
|
136
|
+
href=str(catalog_json),
|
|
137
|
+
catalog_type=CatalogType.SELF_CONTAINED,
|
|
138
|
+
)
|
|
128
139
|
src_items = list(
|
|
129
140
|
self.search(
|
|
130
141
|
time=time, bounds=bounds, area=area, search_kwargs=search_kwargs
|
|
131
142
|
)
|
|
132
143
|
)
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
item
|
|
143
|
-
|
|
144
|
-
|
|
145
|
-
|
|
146
|
-
|
|
147
|
-
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
item
|
|
153
|
-
|
|
154
|
-
|
|
155
|
-
|
|
156
|
-
|
|
157
|
-
|
|
158
|
-
|
|
159
|
-
|
|
160
|
-
|
|
161
|
-
|
|
162
|
-
|
|
163
|
-
|
|
164
|
-
|
|
165
|
-
|
|
166
|
-
|
|
167
|
-
|
|
168
|
-
|
|
169
|
-
|
|
170
|
-
|
|
171
|
-
|
|
172
|
-
|
|
173
|
-
|
|
174
|
-
|
|
175
|
-
|
|
176
|
-
|
|
177
|
-
|
|
178
|
-
|
|
179
|
-
|
|
180
|
-
|
|
181
|
-
|
|
182
|
-
|
|
183
|
-
|
|
184
|
-
|
|
185
|
-
|
|
186
|
-
|
|
187
|
-
|
|
188
|
-
|
|
189
|
-
|
|
190
|
-
break
|
|
144
|
+
# collect all items and download assets if required
|
|
145
|
+
items: List[Item] = []
|
|
146
|
+
item_ids = set()
|
|
147
|
+
for n, item in enumerate(src_items, 1):
|
|
148
|
+
logger.debug("found item %s", item)
|
|
149
|
+
item = item.clone()
|
|
150
|
+
if assets:
|
|
151
|
+
logger.debug("get assets %s", assets)
|
|
152
|
+
item = get_assets(
|
|
153
|
+
item,
|
|
154
|
+
assets,
|
|
155
|
+
output_path / self.id / item.id,
|
|
156
|
+
resolution=assets_dst_resolution,
|
|
157
|
+
convert_profile=assets_convert_profile,
|
|
158
|
+
overwrite=overwrite,
|
|
159
|
+
ignore_if_exists=True,
|
|
160
|
+
)
|
|
161
|
+
if copy_metadata:
|
|
162
|
+
item = get_metadata_assets(
|
|
163
|
+
item,
|
|
164
|
+
output_path / self.id / item.id,
|
|
165
|
+
metadata_parser_classes=metadata_parser_classes,
|
|
166
|
+
resolution=assets_dst_resolution,
|
|
167
|
+
convert_profile=assets_convert_profile,
|
|
168
|
+
overwrite=overwrite,
|
|
169
|
+
)
|
|
170
|
+
# this has to be set to None, otherwise pystac will mess up the asset paths
|
|
171
|
+
# after normalizing
|
|
172
|
+
item.set_self_href(None)
|
|
173
|
+
|
|
174
|
+
items.append(item)
|
|
175
|
+
item_ids.add(item.id)
|
|
176
|
+
|
|
177
|
+
if progress_callback:
|
|
178
|
+
progress_callback(n=n, total=len(src_items))
|
|
179
|
+
|
|
180
|
+
# for existing_collection in existing_collections:
|
|
181
|
+
# if existing_collection.id == collection.id:
|
|
182
|
+
# logger.debug("try to find unregistered items in collection")
|
|
183
|
+
# collection_root_path = MPath.from_inp(
|
|
184
|
+
# existing_collection.get_self_href()
|
|
185
|
+
# ).parent
|
|
186
|
+
# for subpath in collection_root_path.ls():
|
|
187
|
+
# if subpath.is_directory():
|
|
188
|
+
# try:
|
|
189
|
+
# item = Item.from_file(
|
|
190
|
+
# subpath / subpath.with_suffix(".json").name
|
|
191
|
+
# )
|
|
192
|
+
# if item.id not in item_ids:
|
|
193
|
+
# logger.debug(
|
|
194
|
+
# "add existing item with id %s", item.id
|
|
195
|
+
# )
|
|
196
|
+
# items.append(item)
|
|
197
|
+
# item_ids.add(item.id)
|
|
198
|
+
# except FileNotFoundError:
|
|
199
|
+
# pass
|
|
200
|
+
# break
|
|
191
201
|
# create collection and copy metadata
|
|
192
202
|
logger.debug("create new collection")
|
|
193
203
|
out_collection = Collection(
|
|
194
|
-
id=
|
|
204
|
+
id=self.id,
|
|
195
205
|
extent=Extent.from_items(items),
|
|
196
|
-
description=
|
|
197
|
-
title=
|
|
198
|
-
stac_extensions=
|
|
199
|
-
license=
|
|
200
|
-
keywords=
|
|
201
|
-
providers=
|
|
202
|
-
summaries=
|
|
203
|
-
extra_fields=
|
|
206
|
+
description=self.description,
|
|
207
|
+
title=self.client.title,
|
|
208
|
+
stac_extensions=self.stac_extensions,
|
|
209
|
+
license=self.client.license,
|
|
210
|
+
keywords=self.client.keywords,
|
|
211
|
+
providers=self.client.providers,
|
|
212
|
+
summaries=self.client.summaries,
|
|
213
|
+
extra_fields=self.client.extra_fields,
|
|
204
214
|
catalog_type=CatalogType.SELF_CONTAINED,
|
|
205
215
|
)
|
|
206
216
|
|
|
@@ -222,14 +232,17 @@ class StaticCatalogWriterMixin(CatalogSearcher):
|
|
|
222
232
|
|
|
223
233
|
def filter_items(
|
|
224
234
|
items: Generator[Item, None, None],
|
|
225
|
-
|
|
226
|
-
max_cloud_cover: float = 100.0,
|
|
235
|
+
query: Optional[str] = None,
|
|
227
236
|
) -> Generator[Item, None, None]:
|
|
228
237
|
"""
|
|
229
238
|
Only for cloudcover now, this can and should be adapted for filter field and value
|
|
230
239
|
the field and value for the item filter would be defined in search.config.py corresponding configs
|
|
231
240
|
and passed down to the individual search approaches via said config and this Function.
|
|
232
241
|
"""
|
|
233
|
-
|
|
234
|
-
|
|
235
|
-
|
|
242
|
+
if query:
|
|
243
|
+
expr = Expr(query)
|
|
244
|
+
for item in items:
|
|
245
|
+
if expr.matches(item.properties):
|
|
246
|
+
yield item
|
|
247
|
+
else:
|
|
248
|
+
yield from items
|
mapchete_eo/search/config.py
CHANGED
|
@@ -1,23 +1,44 @@
|
|
|
1
|
-
from typing import Optional
|
|
1
|
+
from typing import Optional, Dict, Any
|
|
2
2
|
|
|
3
3
|
from mapchete.path import MPath, MPathLike
|
|
4
|
-
from pydantic import BaseModel
|
|
4
|
+
from pydantic import BaseModel, model_validator
|
|
5
5
|
|
|
6
6
|
|
|
7
7
|
class StacSearchConfig(BaseModel):
|
|
8
8
|
max_cloud_cover: float = 100.0
|
|
9
|
+
query: Optional[str] = None
|
|
9
10
|
catalog_chunk_threshold: int = 10_000
|
|
10
11
|
catalog_chunk_zoom: int = 5
|
|
11
12
|
catalog_pagesize: int = 100
|
|
12
13
|
footprint_buffer: float = 0
|
|
13
14
|
|
|
15
|
+
@model_validator(mode="before")
|
|
16
|
+
def deprecate_max_cloud_cover(cls, values: Dict[str, Any]) -> Dict[str, Any]:
|
|
17
|
+
if "max_cloud_cover" in values: # pragma: no cover
|
|
18
|
+
raise DeprecationWarning(
|
|
19
|
+
"'max_cloud_cover' will be deprecated soon. Please use 'eo:cloud_cover<=...' in the source 'query' field.",
|
|
20
|
+
)
|
|
21
|
+
return values
|
|
22
|
+
|
|
14
23
|
|
|
15
24
|
class StacStaticConfig(BaseModel):
|
|
16
|
-
|
|
25
|
+
@model_validator(mode="before")
|
|
26
|
+
def deprecate_max_cloud_cover(cls, values: Dict[str, Any]) -> Dict[str, Any]:
|
|
27
|
+
if "max_cloud_cover" in values: # pragma: no cover
|
|
28
|
+
raise DeprecationWarning(
|
|
29
|
+
"'max_cloud_cover' will be deprecated soon. Please use 'eo:cloud_cover<=...' in the source 'query' field.",
|
|
30
|
+
)
|
|
31
|
+
return values
|
|
17
32
|
|
|
18
33
|
|
|
19
34
|
class UTMSearchConfig(BaseModel):
|
|
20
|
-
|
|
35
|
+
@model_validator(mode="before")
|
|
36
|
+
def deprecate_max_cloud_cover(cls, values: Dict[str, Any]) -> Dict[str, Any]:
|
|
37
|
+
if "max_cloud_cover" in values: # pragma: no cover
|
|
38
|
+
raise DeprecationWarning(
|
|
39
|
+
"'max_cloud_cover' will be deprecated soon. Please use 'eo:cloud_cover<=...' in the source 'query' field.",
|
|
40
|
+
)
|
|
41
|
+
return values
|
|
21
42
|
|
|
22
43
|
sinergise_aws_collections: dict = dict(
|
|
23
44
|
S2_L2A=dict(
|
mapchete_eo/search/s2_mgrs.py
CHANGED
|
@@ -6,18 +6,17 @@ from functools import cached_property
|
|
|
6
6
|
from itertools import product
|
|
7
7
|
from typing import List, Literal, Optional, Tuple, Union
|
|
8
8
|
|
|
9
|
-
from mapchete.geometry import
|
|
9
|
+
from mapchete.geometry import (
|
|
10
|
+
reproject_geometry,
|
|
11
|
+
repair_antimeridian_geometry,
|
|
12
|
+
transform_to_latlon,
|
|
13
|
+
)
|
|
10
14
|
from mapchete.types import Bounds
|
|
11
15
|
from rasterio.crs import CRS
|
|
12
16
|
from shapely import prepare
|
|
13
17
|
from shapely.geometry import box, mapping, shape
|
|
14
18
|
from shapely.geometry.base import BaseGeometry
|
|
15
19
|
|
|
16
|
-
from mapchete_eo.geometry import (
|
|
17
|
-
bounds_to_geom,
|
|
18
|
-
repair_antimeridian_geometry,
|
|
19
|
-
transform_to_latlon,
|
|
20
|
-
)
|
|
21
20
|
|
|
22
21
|
LATLON_LEFT = -180
|
|
23
22
|
LATLON_RIGHT = 180
|
|
@@ -255,7 +254,7 @@ class S2Tile:
|
|
|
255
254
|
grid_square = tile_id[3:]
|
|
256
255
|
try:
|
|
257
256
|
int(utm_zone)
|
|
258
|
-
except Exception:
|
|
257
|
+
except Exception: # pragma: no cover
|
|
259
258
|
raise ValueError(f"invalid UTM zone given: {utm_zone}")
|
|
260
259
|
|
|
261
260
|
return MGRSCell(utm_zone, latitude_band).tile(grid_square)
|
|
@@ -268,7 +267,7 @@ class S2Tile:
|
|
|
268
267
|
def s2_tiles_from_bounds(
|
|
269
268
|
left: float, bottom: float, right: float, top: float
|
|
270
269
|
) -> List[S2Tile]:
|
|
271
|
-
bounds = Bounds(left, bottom, right, top)
|
|
270
|
+
bounds = Bounds(left, bottom, right, top, crs="EPSG:4326")
|
|
272
271
|
|
|
273
272
|
# determine zones in eastern-western direction
|
|
274
273
|
min_zone_idx = math.floor((left + LATLON_WIDTH_OFFSET) / UTM_ZONE_WIDTH)
|
|
@@ -291,7 +290,7 @@ def s2_tiles_from_bounds(
|
|
|
291
290
|
min_latitude_band_idx -= 1
|
|
292
291
|
max_latitude_band_idx += 1
|
|
293
292
|
|
|
294
|
-
aoi =
|
|
293
|
+
aoi = bounds.latlon_geometry()
|
|
295
294
|
prepare(aoi)
|
|
296
295
|
|
|
297
296
|
def tiles_generator():
|
|
@@ -1,19 +1,20 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
1
3
|
import logging
|
|
2
4
|
from datetime import datetime
|
|
3
5
|
from functools import cached_property
|
|
4
|
-
from typing import Any,
|
|
6
|
+
from typing import Any, Dict, Generator, Iterator, List, Optional, Set, Union
|
|
5
7
|
|
|
6
8
|
from mapchete import Timer
|
|
7
|
-
from mapchete.path import MPathLike
|
|
8
9
|
from mapchete.tile import BufferedTilePyramid
|
|
9
10
|
from mapchete.types import Bounds, BoundsLike
|
|
10
11
|
from pystac import Item
|
|
11
|
-
from pystac_client import Client
|
|
12
|
-
from shapely.geometry import shape
|
|
12
|
+
from pystac_client import Client, CollectionClient, ItemSearch
|
|
13
|
+
from shapely.geometry import shape, box
|
|
13
14
|
from shapely.geometry.base import BaseGeometry
|
|
14
15
|
|
|
15
16
|
from mapchete_eo.product import blacklist_products
|
|
16
|
-
from mapchete_eo.search.base import
|
|
17
|
+
from mapchete_eo.search.base import CollectionSearcher, StaticCollectionWriterMixin
|
|
17
18
|
from mapchete_eo.search.config import StacSearchConfig
|
|
18
19
|
from mapchete_eo.settings import mapchete_eo_settings
|
|
19
20
|
from mapchete_eo.types import TimeRange
|
|
@@ -21,8 +22,8 @@ from mapchete_eo.types import TimeRange
|
|
|
21
22
|
logger = logging.getLogger(__name__)
|
|
22
23
|
|
|
23
24
|
|
|
24
|
-
class
|
|
25
|
-
|
|
25
|
+
class STACSearchCollection(StaticCollectionWriterMixin, CollectionSearcher):
|
|
26
|
+
collection: str
|
|
26
27
|
blacklist: Set[str] = (
|
|
27
28
|
blacklist_products(mapchete_eo_settings.blacklist)
|
|
28
29
|
if mapchete_eo_settings.blacklist
|
|
@@ -30,27 +31,19 @@ class STACSearchCatalog(StaticCatalogWriterMixin, CatalogSearcher):
|
|
|
30
31
|
)
|
|
31
32
|
config_cls = StacSearchConfig
|
|
32
33
|
|
|
33
|
-
def __init__(
|
|
34
|
-
self,
|
|
35
|
-
collections: Optional[List[str]] = None,
|
|
36
|
-
stac_item_modifiers: Optional[List[Callable[[Item], Item]]] = None,
|
|
37
|
-
endpoint: Optional[MPathLike] = None,
|
|
38
|
-
):
|
|
39
|
-
if endpoint is not None:
|
|
40
|
-
self.endpoint = endpoint
|
|
41
|
-
if collections:
|
|
42
|
-
self.collections = collections
|
|
43
|
-
else: # pragma: no cover
|
|
44
|
-
raise ValueError("collections must be given")
|
|
45
|
-
self.stac_item_modifiers = stac_item_modifiers
|
|
46
|
-
|
|
47
34
|
@cached_property
|
|
48
|
-
def client(self) ->
|
|
49
|
-
return
|
|
35
|
+
def client(self) -> CollectionClient:
|
|
36
|
+
return CollectionClient.from_file(self.collection)
|
|
50
37
|
|
|
51
38
|
@cached_property
|
|
52
39
|
def eo_bands(self) -> List[str]:
|
|
53
|
-
|
|
40
|
+
item_assets = self.client.extra_fields.get("item_assets", {})
|
|
41
|
+
for v in item_assets.values():
|
|
42
|
+
if "eo:bands" in v and "data" in v.get("roles", []):
|
|
43
|
+
return ["eo:bands"]
|
|
44
|
+
else: # pragma: no cover
|
|
45
|
+
logger.debug("cannot find eo:bands definition from collections")
|
|
46
|
+
return []
|
|
54
47
|
|
|
55
48
|
@cached_property
|
|
56
49
|
def id(self) -> str:
|
|
@@ -69,27 +62,35 @@ class STACSearchCatalog(StaticCatalogWriterMixin, CatalogSearcher):
|
|
|
69
62
|
time: Optional[Union[TimeRange, List[TimeRange]]] = None,
|
|
70
63
|
bounds: Optional[BoundsLike] = None,
|
|
71
64
|
area: Optional[BaseGeometry] = None,
|
|
65
|
+
query: Optional[str] = None,
|
|
72
66
|
search_kwargs: Optional[Dict[str, Any]] = None,
|
|
73
67
|
) -> Generator[Item, None, None]:
|
|
74
68
|
config = self.config_cls(**search_kwargs or {})
|
|
75
69
|
if bounds:
|
|
76
70
|
bounds = Bounds.from_inp(bounds)
|
|
77
|
-
if time is None: # pragma: no cover
|
|
78
|
-
raise ValueError("time must be set")
|
|
79
71
|
if area is None and bounds is None: # pragma: no cover
|
|
80
72
|
raise ValueError("either bounds or area have to be given")
|
|
81
73
|
|
|
82
74
|
if area is not None and area.is_empty: # pragma: no cover
|
|
83
75
|
return
|
|
84
76
|
|
|
85
|
-
def _searches():
|
|
86
|
-
|
|
77
|
+
def _searches() -> Generator[ItemSearch, None, None]:
|
|
78
|
+
def _search_chunks(
|
|
79
|
+
time_range: Optional[TimeRange] = None,
|
|
80
|
+
bounds: Optional[BoundsLike] = None,
|
|
81
|
+
area: Optional[BaseGeometry] = None,
|
|
82
|
+
query: Optional[str] = None,
|
|
83
|
+
):
|
|
87
84
|
search = self._search(
|
|
88
|
-
time_range=time_range,
|
|
85
|
+
time_range=time_range,
|
|
86
|
+
bounds=bounds,
|
|
87
|
+
area=box(*area.bounds) if area else None,
|
|
88
|
+
query=query,
|
|
89
|
+
config=config,
|
|
89
90
|
)
|
|
90
91
|
logger.debug("found %s products", search.matched())
|
|
91
92
|
matched = search.matched() or 0
|
|
92
|
-
if matched > config.catalog_chunk_threshold:
|
|
93
|
+
if matched > config.catalog_chunk_threshold: # pragma: no cover
|
|
93
94
|
spatial_search_chunks = SpatialSearchChunks(
|
|
94
95
|
bounds=bounds,
|
|
95
96
|
area=area,
|
|
@@ -105,6 +106,7 @@ class STACSearchCatalog(StaticCatalogWriterMixin, CatalogSearcher):
|
|
|
105
106
|
with Timer() as duration:
|
|
106
107
|
chunk_search = self._search(
|
|
107
108
|
time_range=time_range,
|
|
109
|
+
query=query,
|
|
108
110
|
config=config,
|
|
109
111
|
**chunk_kwargs,
|
|
110
112
|
)
|
|
@@ -119,48 +121,60 @@ class STACSearchCatalog(StaticCatalogWriterMixin, CatalogSearcher):
|
|
|
119
121
|
else:
|
|
120
122
|
yield search
|
|
121
123
|
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
124
|
+
if time:
|
|
125
|
+
# search time range(s)
|
|
126
|
+
for time_range in time if isinstance(time, list) else [time]:
|
|
127
|
+
yield from _search_chunks(
|
|
128
|
+
time_range=time_range,
|
|
129
|
+
bounds=bounds,
|
|
130
|
+
area=area,
|
|
131
|
+
query=query,
|
|
132
|
+
)
|
|
133
|
+
else:
|
|
134
|
+
# don't apply temporal filter
|
|
135
|
+
yield from _search_chunks(
|
|
136
|
+
bounds=bounds,
|
|
137
|
+
area=area,
|
|
138
|
+
query=query,
|
|
139
|
+
)
|
|
130
140
|
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
else: # pragma: no cover
|
|
140
|
-
raise ValueError(f"cannot find collection {collection}")
|
|
141
|
-
else: # pragma: no cover
|
|
142
|
-
logger.debug("cannot find eo:bands definition from collections")
|
|
143
|
-
return []
|
|
141
|
+
for search in _searches():
|
|
142
|
+
for item in search.items():
|
|
143
|
+
if item.get_self_href() in self.blacklist: # pragma: no cover
|
|
144
|
+
logger.debug(
|
|
145
|
+
"item %s found in blacklist and skipping", item.get_self_href()
|
|
146
|
+
)
|
|
147
|
+
continue
|
|
148
|
+
yield item
|
|
144
149
|
|
|
145
150
|
@cached_property
|
|
146
151
|
def default_search_params(self):
|
|
147
152
|
return {
|
|
148
|
-
"collections": self.
|
|
153
|
+
"collections": [self.client],
|
|
149
154
|
"bbox": None,
|
|
150
155
|
"intersects": None,
|
|
151
156
|
}
|
|
152
157
|
|
|
158
|
+
@cached_property
|
|
159
|
+
def search_client(self) -> Client:
|
|
160
|
+
# looks weird, right?
|
|
161
|
+
#
|
|
162
|
+
# one would assume that directly returning self.client.get_root() would
|
|
163
|
+
# do the same but if we do so, it seems to ignore the "collections" parameter
|
|
164
|
+
# and thus query all collection available on that search endpoint.
|
|
165
|
+
#
|
|
166
|
+
# the only way to fix this, is to instantiate Client from scratch.
|
|
167
|
+
return Client.from_file(self.client.get_root().self_href)
|
|
168
|
+
|
|
153
169
|
def _search(
|
|
154
170
|
self,
|
|
155
171
|
time_range: Optional[TimeRange] = None,
|
|
156
172
|
bounds: Optional[Bounds] = None,
|
|
157
173
|
area: Optional[BaseGeometry] = None,
|
|
174
|
+
query: Optional[str] = None,
|
|
158
175
|
config: StacSearchConfig = StacSearchConfig(),
|
|
159
176
|
**kwargs,
|
|
160
|
-
):
|
|
161
|
-
if time_range is None: # pragma: no cover
|
|
162
|
-
raise ValueError("time_range not provided")
|
|
163
|
-
|
|
177
|
+
) -> ItemSearch:
|
|
164
178
|
if bounds is not None:
|
|
165
179
|
if shape(bounds).is_empty: # pragma: no cover
|
|
166
180
|
raise ValueError("bounds empty")
|
|
@@ -170,22 +184,29 @@ class STACSearchCatalog(StaticCatalogWriterMixin, CatalogSearcher):
|
|
|
170
184
|
raise ValueError("area empty")
|
|
171
185
|
kwargs.update(intersects=area)
|
|
172
186
|
|
|
173
|
-
|
|
174
|
-
|
|
175
|
-
|
|
176
|
-
|
|
177
|
-
|
|
178
|
-
|
|
179
|
-
|
|
180
|
-
|
|
181
|
-
|
|
182
|
-
|
|
183
|
-
|
|
184
|
-
|
|
185
|
-
|
|
186
|
-
|
|
187
|
-
|
|
188
|
-
|
|
187
|
+
if time_range:
|
|
188
|
+
start = (
|
|
189
|
+
time_range.start.date()
|
|
190
|
+
if isinstance(time_range.start, datetime)
|
|
191
|
+
else time_range.start
|
|
192
|
+
)
|
|
193
|
+
end = (
|
|
194
|
+
time_range.end.date()
|
|
195
|
+
if isinstance(time_range.end, datetime)
|
|
196
|
+
else time_range.end
|
|
197
|
+
)
|
|
198
|
+
search_params = dict(
|
|
199
|
+
self.default_search_params,
|
|
200
|
+
datetime=f"{start}/{end}",
|
|
201
|
+
query=[query] if query else None,
|
|
202
|
+
**kwargs,
|
|
203
|
+
)
|
|
204
|
+
else:
|
|
205
|
+
search_params = dict(
|
|
206
|
+
self.default_search_params,
|
|
207
|
+
query=[query] if query else None,
|
|
208
|
+
**kwargs,
|
|
209
|
+
)
|
|
189
210
|
if (
|
|
190
211
|
bounds is None
|
|
191
212
|
and area is None
|
|
@@ -194,14 +215,12 @@ class STACSearchCatalog(StaticCatalogWriterMixin, CatalogSearcher):
|
|
|
194
215
|
raise ValueError("no bounds or area given")
|
|
195
216
|
logger.debug("query catalog using params: %s", search_params)
|
|
196
217
|
with Timer() as duration:
|
|
197
|
-
result = self.
|
|
218
|
+
result = self.search_client.search(
|
|
219
|
+
**search_params, limit=config.catalog_pagesize
|
|
220
|
+
)
|
|
198
221
|
logger.debug("query took %s", str(duration))
|
|
199
222
|
return result
|
|
200
223
|
|
|
201
|
-
def get_collections(self):
|
|
202
|
-
for collection_name in self.collections:
|
|
203
|
-
yield self.client.get_collection(collection_name)
|
|
204
|
-
|
|
205
224
|
|
|
206
225
|
class SpatialSearchChunks:
|
|
207
226
|
bounds: Bounds
|