mapchete-eo 2026.2.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- mapchete_eo/__init__.py +1 -0
- mapchete_eo/array/__init__.py +0 -0
- mapchete_eo/array/buffer.py +16 -0
- mapchete_eo/array/color.py +29 -0
- mapchete_eo/array/convert.py +163 -0
- mapchete_eo/base.py +653 -0
- mapchete_eo/blacklist.txt +175 -0
- mapchete_eo/cli/__init__.py +30 -0
- mapchete_eo/cli/bounds.py +22 -0
- mapchete_eo/cli/options_arguments.py +227 -0
- mapchete_eo/cli/s2_brdf.py +77 -0
- mapchete_eo/cli/s2_cat_results.py +130 -0
- mapchete_eo/cli/s2_find_broken_products.py +77 -0
- mapchete_eo/cli/s2_jp2_static_catalog.py +166 -0
- mapchete_eo/cli/s2_mask.py +71 -0
- mapchete_eo/cli/s2_mgrs.py +45 -0
- mapchete_eo/cli/s2_rgb.py +114 -0
- mapchete_eo/cli/s2_verify.py +129 -0
- mapchete_eo/cli/static_catalog.py +82 -0
- mapchete_eo/eostac.py +30 -0
- mapchete_eo/exceptions.py +87 -0
- mapchete_eo/image_operations/__init__.py +12 -0
- mapchete_eo/image_operations/blend_functions.py +579 -0
- mapchete_eo/image_operations/color_correction.py +136 -0
- mapchete_eo/image_operations/compositing.py +266 -0
- mapchete_eo/image_operations/dtype_scale.py +43 -0
- mapchete_eo/image_operations/fillnodata.py +130 -0
- mapchete_eo/image_operations/filters.py +319 -0
- mapchete_eo/image_operations/linear_normalization.py +81 -0
- mapchete_eo/image_operations/sigmoidal.py +114 -0
- mapchete_eo/io/__init__.py +37 -0
- mapchete_eo/io/assets.py +496 -0
- mapchete_eo/io/items.py +162 -0
- mapchete_eo/io/levelled_cubes.py +259 -0
- mapchete_eo/io/path.py +155 -0
- mapchete_eo/io/products.py +423 -0
- mapchete_eo/io/profiles.py +45 -0
- mapchete_eo/platforms/sentinel2/__init__.py +17 -0
- mapchete_eo/platforms/sentinel2/_mapper_registry.py +89 -0
- mapchete_eo/platforms/sentinel2/bandpass_adjustment.py +104 -0
- mapchete_eo/platforms/sentinel2/brdf/__init__.py +8 -0
- mapchete_eo/platforms/sentinel2/brdf/config.py +32 -0
- mapchete_eo/platforms/sentinel2/brdf/correction.py +260 -0
- mapchete_eo/platforms/sentinel2/brdf/hls.py +251 -0
- mapchete_eo/platforms/sentinel2/brdf/models.py +44 -0
- mapchete_eo/platforms/sentinel2/brdf/protocols.py +27 -0
- mapchete_eo/platforms/sentinel2/brdf/ross_thick.py +136 -0
- mapchete_eo/platforms/sentinel2/brdf/sun_angle_arrays.py +76 -0
- mapchete_eo/platforms/sentinel2/config.py +241 -0
- mapchete_eo/platforms/sentinel2/driver.py +43 -0
- mapchete_eo/platforms/sentinel2/masks.py +329 -0
- mapchete_eo/platforms/sentinel2/metadata_parser/__init__.py +6 -0
- mapchete_eo/platforms/sentinel2/metadata_parser/base.py +56 -0
- mapchete_eo/platforms/sentinel2/metadata_parser/default_path_mapper.py +135 -0
- mapchete_eo/platforms/sentinel2/metadata_parser/models.py +78 -0
- mapchete_eo/platforms/sentinel2/metadata_parser/s2metadata.py +639 -0
- mapchete_eo/platforms/sentinel2/preconfigured_sources/__init__.py +57 -0
- mapchete_eo/platforms/sentinel2/preconfigured_sources/guessers.py +108 -0
- mapchete_eo/platforms/sentinel2/preconfigured_sources/item_mappers.py +171 -0
- mapchete_eo/platforms/sentinel2/preconfigured_sources/metadata_xml_mappers.py +217 -0
- mapchete_eo/platforms/sentinel2/preprocessing_tasks.py +50 -0
- mapchete_eo/platforms/sentinel2/processing_baseline.py +163 -0
- mapchete_eo/platforms/sentinel2/product.py +747 -0
- mapchete_eo/platforms/sentinel2/source.py +114 -0
- mapchete_eo/platforms/sentinel2/types.py +114 -0
- mapchete_eo/processes/__init__.py +0 -0
- mapchete_eo/processes/config.py +51 -0
- mapchete_eo/processes/dtype_scale.py +112 -0
- mapchete_eo/processes/eo_to_xarray.py +19 -0
- mapchete_eo/processes/merge_rasters.py +239 -0
- mapchete_eo/product.py +323 -0
- mapchete_eo/protocols.py +61 -0
- mapchete_eo/search/__init__.py +14 -0
- mapchete_eo/search/base.py +285 -0
- mapchete_eo/search/config.py +113 -0
- mapchete_eo/search/s2_mgrs.py +313 -0
- mapchete_eo/search/stac_search.py +278 -0
- mapchete_eo/search/stac_static.py +197 -0
- mapchete_eo/search/utm_search.py +251 -0
- mapchete_eo/settings.py +25 -0
- mapchete_eo/sort.py +60 -0
- mapchete_eo/source.py +109 -0
- mapchete_eo/time.py +62 -0
- mapchete_eo/types.py +76 -0
- mapchete_eo-2026.2.0.dist-info/METADATA +91 -0
- mapchete_eo-2026.2.0.dist-info/RECORD +89 -0
- mapchete_eo-2026.2.0.dist-info/WHEEL +4 -0
- mapchete_eo-2026.2.0.dist-info/entry_points.txt +11 -0
- mapchete_eo-2026.2.0.dist-info/licenses/LICENSE +21 -0
|
@@ -0,0 +1,113 @@
|
|
|
1
|
+
import logging
|
|
2
|
+
|
|
3
|
+
|
|
4
|
+
from contextlib import contextmanager
|
|
5
|
+
from typing import Optional, Dict, Any
|
|
6
|
+
|
|
7
|
+
from mapchete.path import MPath, MPathLike
|
|
8
|
+
from pydantic import BaseModel, model_validator
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
class StacSearchConfig(BaseModel):
|
|
12
|
+
max_cloud_cover: float = 100.0
|
|
13
|
+
query: Optional[str] = None
|
|
14
|
+
catalog_chunk_threshold: int = 10_000
|
|
15
|
+
catalog_chunk_zoom: int = 5
|
|
16
|
+
catalog_pagesize: int = 100
|
|
17
|
+
footprint_buffer: float = 0
|
|
18
|
+
|
|
19
|
+
@model_validator(mode="before")
|
|
20
|
+
def deprecate_max_cloud_cover(cls, values: Dict[str, Any]) -> Dict[str, Any]:
|
|
21
|
+
if "max_cloud_cover" in values: # pragma: no cover
|
|
22
|
+
raise DeprecationWarning(
|
|
23
|
+
"'max_cloud_cover' will be deprecated soon. Please use 'eo:cloud_cover<=...' in the source 'query' field.",
|
|
24
|
+
)
|
|
25
|
+
return values
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
class StacStaticConfig(BaseModel):
|
|
29
|
+
@model_validator(mode="before")
|
|
30
|
+
def deprecate_max_cloud_cover(cls, values: Dict[str, Any]) -> Dict[str, Any]:
|
|
31
|
+
if "max_cloud_cover" in values: # pragma: no cover
|
|
32
|
+
raise DeprecationWarning(
|
|
33
|
+
"'max_cloud_cover' will be deprecated soon. Please use 'eo:cloud_cover<=...' in the source 'query' field.",
|
|
34
|
+
)
|
|
35
|
+
return values
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
class UTMSearchConfig(BaseModel):
|
|
39
|
+
@model_validator(mode="before")
|
|
40
|
+
def deprecate_max_cloud_cover(cls, values: Dict[str, Any]) -> Dict[str, Any]:
|
|
41
|
+
if "max_cloud_cover" in values: # pragma: no cover
|
|
42
|
+
raise DeprecationWarning(
|
|
43
|
+
"'max_cloud_cover' will be deprecated soon. Please use 'eo:cloud_cover<=...' in the source 'query' field.",
|
|
44
|
+
)
|
|
45
|
+
return values
|
|
46
|
+
|
|
47
|
+
sinergise_aws_collections: dict = dict(
|
|
48
|
+
S2_L2A=dict(
|
|
49
|
+
id="sentinel-s2-l2a",
|
|
50
|
+
path=MPath(
|
|
51
|
+
"https://sentinel-s2-l2a-stac.s3.amazonaws.com/sentinel-s2-l2a.json"
|
|
52
|
+
),
|
|
53
|
+
endpoint="s3://sentinel-s2-l2a-stac",
|
|
54
|
+
),
|
|
55
|
+
S2_L1C=dict(
|
|
56
|
+
id="sentinel-s2-l1c",
|
|
57
|
+
path=MPath(
|
|
58
|
+
"https://sentinel-s2-l1c-stac.s3.amazonaws.com/sentinel-s2-l1c.json"
|
|
59
|
+
),
|
|
60
|
+
endpoint="s3://sentinel-s2-l1c-stac",
|
|
61
|
+
),
|
|
62
|
+
S1_GRD=dict(
|
|
63
|
+
id="sentinel-s1-l1c",
|
|
64
|
+
path=MPath(
|
|
65
|
+
"https://sentinel-s1-l1c-stac.s3.amazonaws.com/sentinel-s1-l1c.json"
|
|
66
|
+
),
|
|
67
|
+
endpoint="s3://sentinel-s1-l1c-stac",
|
|
68
|
+
),
|
|
69
|
+
)
|
|
70
|
+
search_index: Optional[MPathLike] = None
|
|
71
|
+
|
|
72
|
+
|
|
73
|
+
@contextmanager
|
|
74
|
+
def patch_invalid_assets():
|
|
75
|
+
"""
|
|
76
|
+
Context manager/decorator to fix pystac crash on malformed assets (strings instead of dicts).
|
|
77
|
+
|
|
78
|
+
"""
|
|
79
|
+
try:
|
|
80
|
+
from pystac.extensions.file import FileExtensionHooks
|
|
81
|
+
except ImportError: # pragma: no cover
|
|
82
|
+
yield
|
|
83
|
+
return
|
|
84
|
+
|
|
85
|
+
logger = logging.getLogger(__name__)
|
|
86
|
+
|
|
87
|
+
_original_migrate = FileExtensionHooks.migrate
|
|
88
|
+
|
|
89
|
+
def _safe_migrate(self, obj, version, info):
|
|
90
|
+
if "assets" in obj and isinstance(obj["assets"], dict):
|
|
91
|
+
bad_keys = []
|
|
92
|
+
for key, asset in obj["assets"].items():
|
|
93
|
+
if not isinstance(asset, dict):
|
|
94
|
+
logger.debug(
|
|
95
|
+
"Removing malformed asset '%s' (type %s) from item %s",
|
|
96
|
+
key,
|
|
97
|
+
type(asset),
|
|
98
|
+
obj.get("id", "unknown"),
|
|
99
|
+
)
|
|
100
|
+
bad_keys.append(key)
|
|
101
|
+
|
|
102
|
+
for key in bad_keys:
|
|
103
|
+
del obj["assets"][key]
|
|
104
|
+
|
|
105
|
+
return _original_migrate(self, obj, version, info)
|
|
106
|
+
|
|
107
|
+
# Apply patch
|
|
108
|
+
FileExtensionHooks.migrate = _safe_migrate
|
|
109
|
+
try:
|
|
110
|
+
yield
|
|
111
|
+
finally:
|
|
112
|
+
# Restore original
|
|
113
|
+
FileExtensionHooks.migrate = _original_migrate
|
|
@@ -0,0 +1,313 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import math
|
|
4
|
+
from dataclasses import dataclass
|
|
5
|
+
from functools import cached_property
|
|
6
|
+
from itertools import product
|
|
7
|
+
from typing import List, Literal, Optional, Tuple, Union
|
|
8
|
+
|
|
9
|
+
from mapchete.geometry import (
|
|
10
|
+
reproject_geometry,
|
|
11
|
+
repair_antimeridian_geometry,
|
|
12
|
+
transform_to_latlon,
|
|
13
|
+
)
|
|
14
|
+
from mapchete.types import Bounds
|
|
15
|
+
from rasterio.crs import CRS
|
|
16
|
+
from shapely import prepare
|
|
17
|
+
from shapely.geometry import box, mapping, shape
|
|
18
|
+
from shapely.geometry.base import BaseGeometry
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
LATLON_LEFT = -180
|
|
22
|
+
LATLON_RIGHT = 180
|
|
23
|
+
LATLON_WIDTH = LATLON_RIGHT - LATLON_LEFT
|
|
24
|
+
LATLON_WIDTH_OFFSET = LATLON_WIDTH / 2
|
|
25
|
+
MIN_LATITUDE = -80.0
|
|
26
|
+
MAX_LATITUDE = 84
|
|
27
|
+
LATLON_HEIGHT = MAX_LATITUDE - MIN_LATITUDE
|
|
28
|
+
LATLON_HEIGHT_OFFSET = -MIN_LATITUDE
|
|
29
|
+
|
|
30
|
+
# width in degrees
|
|
31
|
+
UTM_ZONE_WIDTH = 6
|
|
32
|
+
UTM_ZONES = [f"{ii:02d}" for ii in range(1, LATLON_WIDTH // UTM_ZONE_WIDTH + 1)]
|
|
33
|
+
|
|
34
|
+
# NOTE: each latitude band is 8° high except the most northern one ("X") is 12°
|
|
35
|
+
LATITUDE_BAND_HEIGHT = 8
|
|
36
|
+
LATITUDE_BANDS = list("CDEFGHJKLMNPQRSTUVWX")
|
|
37
|
+
|
|
38
|
+
# column names seem to span over three UTM zones (8 per zone)
|
|
39
|
+
COLUMNS_PER_ZONE = 8
|
|
40
|
+
SQUARE_COLUMNS = list("ABCDEFGHJKLMNPQRSTUVWXYZ")
|
|
41
|
+
|
|
42
|
+
# rows are weird. zone 01 starts at -80° with "M", then zone 02 with "S", then zone 03 with "M" and so on
|
|
43
|
+
# SQUARE_ROW_START = ["M", "S"]
|
|
44
|
+
# SQUARE_ROW_START = ["B", "G"] # manual offset so the naming starts on the South Pole
|
|
45
|
+
SQUARE_ROW_START = ["A", "F"]
|
|
46
|
+
SQUARE_ROWS = list("ABCDEFGHJKLMNPQRSTUV")
|
|
47
|
+
|
|
48
|
+
# 100 x 100 km
|
|
49
|
+
TILE_WIDTH_M = 100_000
|
|
50
|
+
TILE_HEIGHT_M = 100_000
|
|
51
|
+
# overlap for bottom and right
|
|
52
|
+
TILE_OVERLAP_M = 9_800
|
|
53
|
+
|
|
54
|
+
# source point of UTM zone from where tiles start
|
|
55
|
+
# UTM_TILE_SOURCE_LEFT = 99_960.0
|
|
56
|
+
UTM_TILE_SOURCE_LEFT = 100_000
|
|
57
|
+
UTM_TILE_SOURCE_BOTTOM = 0
|
|
58
|
+
|
|
59
|
+
|
|
60
|
+
class InvalidMGRSSquare(Exception):
|
|
61
|
+
"""Raised when an invalid square index has been given"""
|
|
62
|
+
|
|
63
|
+
|
|
64
|
+
@dataclass(frozen=True)
|
|
65
|
+
class MGRSCell:
|
|
66
|
+
utm_zone: str
|
|
67
|
+
latitude_band: str
|
|
68
|
+
|
|
69
|
+
def tiles(self) -> List[S2Tile]:
|
|
70
|
+
# TODO: this is incredibly slow
|
|
71
|
+
def tiles_generator():
|
|
72
|
+
for column_index, row_index in self._global_square_indexes:
|
|
73
|
+
tile = self.tile(
|
|
74
|
+
grid_square=self._global_square_index_to_grid_square(
|
|
75
|
+
column_index, row_index
|
|
76
|
+
),
|
|
77
|
+
column_index=column_index,
|
|
78
|
+
row_index=row_index,
|
|
79
|
+
)
|
|
80
|
+
if tile.latlon_geometry.intersects(self.latlon_geometry):
|
|
81
|
+
yield tile
|
|
82
|
+
|
|
83
|
+
return list(tiles_generator())
|
|
84
|
+
|
|
85
|
+
def tile(
|
|
86
|
+
self,
|
|
87
|
+
grid_square: str,
|
|
88
|
+
column_index: Optional[int] = None,
|
|
89
|
+
row_index: Optional[int] = None,
|
|
90
|
+
) -> S2Tile:
|
|
91
|
+
if column_index is None or row_index is None:
|
|
92
|
+
for column_index, row_index in self._global_square_indexes:
|
|
93
|
+
if (
|
|
94
|
+
self._global_square_index_to_grid_square(column_index, row_index)
|
|
95
|
+
== grid_square
|
|
96
|
+
):
|
|
97
|
+
break
|
|
98
|
+
else: # pragma: no cover
|
|
99
|
+
raise InvalidMGRSSquare(
|
|
100
|
+
f"global square index could not be determined for {self.utm_zone}{self.latitude_band}{grid_square}"
|
|
101
|
+
)
|
|
102
|
+
|
|
103
|
+
return S2Tile(
|
|
104
|
+
utm_zone=self.utm_zone,
|
|
105
|
+
latitude_band=self.latitude_band,
|
|
106
|
+
grid_square=grid_square,
|
|
107
|
+
global_column_index=column_index,
|
|
108
|
+
global_row_index=row_index,
|
|
109
|
+
)
|
|
110
|
+
|
|
111
|
+
@cached_property
|
|
112
|
+
def _global_square_indexes(self) -> List[Tuple[int, int]]:
|
|
113
|
+
"""Return global row/column indexes of squares within MGRSCell."""
|
|
114
|
+
|
|
115
|
+
# reproject cell bounds to UTM
|
|
116
|
+
utm_bounds = Bounds(
|
|
117
|
+
*reproject_geometry(
|
|
118
|
+
self.latlon_geometry, src_crs="EPSG:4326", dst_crs=self.crs
|
|
119
|
+
).bounds
|
|
120
|
+
)
|
|
121
|
+
# get min/max column index values based on tile grid source and tile width/height
|
|
122
|
+
min_col = UTM_ZONES.index(self.utm_zone) * COLUMNS_PER_ZONE
|
|
123
|
+
max_col = min_col + COLUMNS_PER_ZONE
|
|
124
|
+
|
|
125
|
+
# count rows from UTM zone bottom
|
|
126
|
+
min_row = math.floor(
|
|
127
|
+
(utm_bounds.bottom - UTM_TILE_SOURCE_BOTTOM) / TILE_HEIGHT_M
|
|
128
|
+
)
|
|
129
|
+
max_row = math.floor((utm_bounds.top - UTM_TILE_SOURCE_BOTTOM) / TILE_HEIGHT_M)
|
|
130
|
+
return list(product(range(min_col, max_col + 1), range(min_row, max_row + 1)))
|
|
131
|
+
|
|
132
|
+
def _global_square_index_to_grid_square(
|
|
133
|
+
self, column_index: int, row_index: int
|
|
134
|
+
) -> str:
|
|
135
|
+
# determine row offset (alternating rows at bottom start at "A" or "F")
|
|
136
|
+
start_row = SQUARE_ROW_START[
|
|
137
|
+
UTM_ZONES.index(self.utm_zone) % len(SQUARE_ROW_START)
|
|
138
|
+
]
|
|
139
|
+
start_row_idx = SQUARE_ROWS.index(start_row)
|
|
140
|
+
|
|
141
|
+
square_column_idx = column_index % len(SQUARE_COLUMNS)
|
|
142
|
+
square_row_idx = (row_index + start_row_idx) % len(SQUARE_ROWS)
|
|
143
|
+
|
|
144
|
+
return f"{SQUARE_COLUMNS[square_column_idx]}{SQUARE_ROWS[square_row_idx]}"
|
|
145
|
+
|
|
146
|
+
@cached_property
|
|
147
|
+
def latlon_bounds(self) -> Bounds:
|
|
148
|
+
left = LATLON_LEFT + UTM_ZONE_WIDTH * UTM_ZONES.index(self.utm_zone)
|
|
149
|
+
bottom = MIN_LATITUDE + LATITUDE_BAND_HEIGHT * LATITUDE_BANDS.index(
|
|
150
|
+
self.latitude_band
|
|
151
|
+
)
|
|
152
|
+
right = left + UTM_ZONE_WIDTH
|
|
153
|
+
top = bottom + (12 if self.latitude_band == "X" else LATITUDE_BAND_HEIGHT)
|
|
154
|
+
return Bounds(left, bottom, right, top)
|
|
155
|
+
|
|
156
|
+
@cached_property
|
|
157
|
+
def crs(self) -> CRS:
|
|
158
|
+
# 7 for south, 6 for north
|
|
159
|
+
hemisphere_code = "7" if self.hemisphere == "S" else "6"
|
|
160
|
+
return CRS.from_string(f"EPSG:32{hemisphere_code}{self.utm_zone}")
|
|
161
|
+
|
|
162
|
+
@cached_property
|
|
163
|
+
def latlon_geometry(self) -> BaseGeometry:
|
|
164
|
+
return shape(self.latlon_bounds)
|
|
165
|
+
|
|
166
|
+
@cached_property
|
|
167
|
+
def hemisphere(self) -> Union[Literal["S"], Literal["N"]]:
|
|
168
|
+
return "S" if self.latitude_band < "N" else "N"
|
|
169
|
+
|
|
170
|
+
|
|
171
|
+
@dataclass(frozen=True)
|
|
172
|
+
class S2Tile:
|
|
173
|
+
utm_zone: str
|
|
174
|
+
latitude_band: str
|
|
175
|
+
grid_square: str
|
|
176
|
+
global_column_index: Optional[int] = None
|
|
177
|
+
global_row_index: Optional[int] = None
|
|
178
|
+
|
|
179
|
+
@cached_property
|
|
180
|
+
def crs(self) -> CRS:
|
|
181
|
+
# 7 for south, 6 for north
|
|
182
|
+
hemisphere = "7" if self.latitude_band < "N" else "6"
|
|
183
|
+
return CRS.from_string(f"EPSG:32{hemisphere}{self.utm_zone}")
|
|
184
|
+
|
|
185
|
+
@cached_property
|
|
186
|
+
def bounds(self) -> Bounds:
|
|
187
|
+
base_bottom = UTM_TILE_SOURCE_BOTTOM + self.square_row * TILE_WIDTH_M
|
|
188
|
+
left = UTM_TILE_SOURCE_LEFT + self.square_column * TILE_WIDTH_M
|
|
189
|
+
bottom = base_bottom - TILE_OVERLAP_M
|
|
190
|
+
right = left + TILE_WIDTH_M + TILE_OVERLAP_M
|
|
191
|
+
top = base_bottom + TILE_HEIGHT_M
|
|
192
|
+
return Bounds(left, bottom, right, top)
|
|
193
|
+
|
|
194
|
+
@cached_property
|
|
195
|
+
def __geo_interface__(self) -> dict:
|
|
196
|
+
return mapping(box(*self.bounds))
|
|
197
|
+
|
|
198
|
+
@cached_property
|
|
199
|
+
def mgrs_cell(self) -> MGRSCell:
|
|
200
|
+
return MGRSCell(self.utm_zone, self.latitude_band)
|
|
201
|
+
|
|
202
|
+
@cached_property
|
|
203
|
+
def latlon_geometry(self) -> BaseGeometry:
|
|
204
|
+
# return repair_antimeridian_geometry(shape(self.latlon_bounds))
|
|
205
|
+
return repair_antimeridian_geometry(transform_to_latlon(shape(self), self.crs))
|
|
206
|
+
|
|
207
|
+
@cached_property
|
|
208
|
+
def latlon_bounds(self) -> Bounds:
|
|
209
|
+
return Bounds.from_inp(self.latlon_geometry)
|
|
210
|
+
|
|
211
|
+
@cached_property
|
|
212
|
+
def tile_id(self) -> str:
|
|
213
|
+
return f"{self.utm_zone}{self.latitude_band}{self.grid_square}"
|
|
214
|
+
|
|
215
|
+
@cached_property
|
|
216
|
+
def square_column(self) -> int:
|
|
217
|
+
if self.global_column_index is None:
|
|
218
|
+
return self._global_square_idx[0] % COLUMNS_PER_ZONE
|
|
219
|
+
return self.global_column_index % COLUMNS_PER_ZONE
|
|
220
|
+
|
|
221
|
+
@cached_property
|
|
222
|
+
def square_row(self) -> int:
|
|
223
|
+
if self.global_row_index is None:
|
|
224
|
+
return self._global_square_idx[1]
|
|
225
|
+
return self.global_row_index
|
|
226
|
+
|
|
227
|
+
@cached_property
|
|
228
|
+
def _global_square_idx(self) -> Tuple[int, int]:
|
|
229
|
+
"""
|
|
230
|
+
Square index based on bottom-left corner of global AOI.
|
|
231
|
+
"""
|
|
232
|
+
for column_index, row_index in self.mgrs_cell._global_square_indexes:
|
|
233
|
+
if (
|
|
234
|
+
self.mgrs_cell._global_square_index_to_grid_square(
|
|
235
|
+
column_index, row_index
|
|
236
|
+
)
|
|
237
|
+
== self.grid_square
|
|
238
|
+
):
|
|
239
|
+
return (column_index, row_index)
|
|
240
|
+
else: # pragma: no cover
|
|
241
|
+
raise InvalidMGRSSquare(
|
|
242
|
+
f"global square index could not be determined for {self.utm_zone}{self.latitude_band}{self.grid_square}"
|
|
243
|
+
)
|
|
244
|
+
|
|
245
|
+
@cached_property
|
|
246
|
+
def hemisphere(self) -> Union[Literal["S"], Literal["N"]]:
|
|
247
|
+
return "S" if self.latitude_band < "N" else "N"
|
|
248
|
+
|
|
249
|
+
@staticmethod
|
|
250
|
+
def from_tile_id(tile_id: str) -> S2Tile:
|
|
251
|
+
tile_id = tile_id.lstrip("T")
|
|
252
|
+
utm_zone = tile_id[:2]
|
|
253
|
+
latitude_band = tile_id[2]
|
|
254
|
+
grid_square = tile_id[3:]
|
|
255
|
+
try:
|
|
256
|
+
int(utm_zone)
|
|
257
|
+
except Exception: # pragma: no cover
|
|
258
|
+
raise ValueError(f"invalid UTM zone given: {utm_zone}")
|
|
259
|
+
|
|
260
|
+
return MGRSCell(utm_zone, latitude_band).tile(grid_square)
|
|
261
|
+
|
|
262
|
+
@staticmethod
|
|
263
|
+
def from_grid_code(grid_code: str) -> S2Tile:
|
|
264
|
+
return S2Tile.from_tile_id(grid_code.lstrip("MGRS-"))
|
|
265
|
+
|
|
266
|
+
|
|
267
|
+
def s2_tiles_from_bounds(
|
|
268
|
+
left: float, bottom: float, right: float, top: float
|
|
269
|
+
) -> List[S2Tile]:
|
|
270
|
+
bounds = Bounds(left, bottom, right, top, crs="EPSG:4326")
|
|
271
|
+
|
|
272
|
+
# determine zones in eastern-western direction
|
|
273
|
+
min_zone_idx = math.floor((left + LATLON_WIDTH_OFFSET) / UTM_ZONE_WIDTH)
|
|
274
|
+
max_zone_idx = math.floor((right + LATLON_WIDTH_OFFSET) / UTM_ZONE_WIDTH)
|
|
275
|
+
|
|
276
|
+
min_latitude_band_idx = math.floor(
|
|
277
|
+
(bottom + LATLON_HEIGHT_OFFSET) / LATITUDE_BAND_HEIGHT
|
|
278
|
+
)
|
|
279
|
+
max_latitude_band_idx = min(
|
|
280
|
+
[
|
|
281
|
+
math.floor((top + LATLON_HEIGHT_OFFSET) / LATITUDE_BAND_HEIGHT),
|
|
282
|
+
len(LATITUDE_BANDS),
|
|
283
|
+
]
|
|
284
|
+
)
|
|
285
|
+
|
|
286
|
+
# in order to also get overlapping tiles from other UTM cells, we also
|
|
287
|
+
# query the neighbors:
|
|
288
|
+
min_zone_idx -= 1
|
|
289
|
+
max_zone_idx += 1
|
|
290
|
+
min_latitude_band_idx -= 1
|
|
291
|
+
max_latitude_band_idx += 1
|
|
292
|
+
|
|
293
|
+
aoi = bounds.latlon_geometry()
|
|
294
|
+
prepare(aoi)
|
|
295
|
+
|
|
296
|
+
def tiles_generator():
|
|
297
|
+
for utm_zone_idx in range(min_zone_idx, max_zone_idx + 1):
|
|
298
|
+
for latitude_band_idx in range(
|
|
299
|
+
# clamp latitude index to range of 0 and number of latitude bands
|
|
300
|
+
max(min_latitude_band_idx, 0),
|
|
301
|
+
min(max_latitude_band_idx + 1, len(LATITUDE_BANDS)),
|
|
302
|
+
):
|
|
303
|
+
cell = MGRSCell(
|
|
304
|
+
utm_zone=UTM_ZONES[utm_zone_idx % len(UTM_ZONES)],
|
|
305
|
+
latitude_band=LATITUDE_BANDS[latitude_band_idx],
|
|
306
|
+
)
|
|
307
|
+
for tile in cell.tiles():
|
|
308
|
+
# bounds check seems to be faster
|
|
309
|
+
# if aoi.intersects(box(*tile.latlon_bounds)):
|
|
310
|
+
if aoi.intersects(tile.latlon_geometry):
|
|
311
|
+
yield tile
|
|
312
|
+
|
|
313
|
+
return list(tiles_generator())
|
|
@@ -0,0 +1,278 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import logging
|
|
4
|
+
from datetime import datetime
|
|
5
|
+
from functools import cached_property
|
|
6
|
+
from typing import Any, Dict, Generator, Iterator, List, Optional, Union
|
|
7
|
+
|
|
8
|
+
from mapchete import Timer
|
|
9
|
+
from mapchete.tile import BufferedTilePyramid
|
|
10
|
+
from mapchete.types import Bounds, BoundsLike
|
|
11
|
+
from pystac import Item
|
|
12
|
+
from pystac_client import Client, CollectionClient, ItemSearch
|
|
13
|
+
from shapely.geometry import shape, box
|
|
14
|
+
from shapely.geometry.base import BaseGeometry
|
|
15
|
+
|
|
16
|
+
from mapchete_eo.search.base import CollectionSearcher, StaticCollectionWriterMixin
|
|
17
|
+
from mapchete_eo.search.config import StacSearchConfig, patch_invalid_assets
|
|
18
|
+
from mapchete_eo.types import TimeRange
|
|
19
|
+
|
|
20
|
+
logger = logging.getLogger(__name__)
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
class STACSearchCollection(StaticCollectionWriterMixin, CollectionSearcher):
|
|
24
|
+
"""
|
|
25
|
+
Search implementation for STAC APIs.
|
|
26
|
+
"""
|
|
27
|
+
|
|
28
|
+
collection: str
|
|
29
|
+
config_cls = StacSearchConfig
|
|
30
|
+
|
|
31
|
+
@cached_property
|
|
32
|
+
def client(self) -> CollectionClient:
|
|
33
|
+
return CollectionClient.from_file(self.collection)
|
|
34
|
+
|
|
35
|
+
@cached_property
|
|
36
|
+
def eo_bands(self) -> List[str]:
|
|
37
|
+
item_assets = self.client.extra_fields.get("item_assets", {})
|
|
38
|
+
for v in item_assets.values():
|
|
39
|
+
if "eo:bands" in v and "data" in v.get("roles", []):
|
|
40
|
+
return ["eo:bands"]
|
|
41
|
+
else: # pragma: no cover
|
|
42
|
+
logger.debug("cannot find eo:bands definition from collections")
|
|
43
|
+
return []
|
|
44
|
+
|
|
45
|
+
def search(
|
|
46
|
+
self,
|
|
47
|
+
time: Optional[Union[TimeRange, List[TimeRange]]] = None,
|
|
48
|
+
bounds: Optional[BoundsLike] = None,
|
|
49
|
+
area: Optional[BaseGeometry] = None,
|
|
50
|
+
query: Optional[str] = None,
|
|
51
|
+
search_kwargs: Optional[Dict[str, Any]] = None,
|
|
52
|
+
) -> Generator[Item, None, None]:
|
|
53
|
+
config = self.config_cls(**search_kwargs or {})
|
|
54
|
+
if bounds:
|
|
55
|
+
bounds = Bounds.from_inp(bounds)
|
|
56
|
+
if area is None and bounds is None: # pragma: no cover
|
|
57
|
+
raise ValueError("either bounds or area have to be given")
|
|
58
|
+
|
|
59
|
+
if area is not None and area.is_empty: # pragma: no cover
|
|
60
|
+
return
|
|
61
|
+
|
|
62
|
+
def _searches() -> Generator[ItemSearch, None, None]:
|
|
63
|
+
def _search_chunks(
|
|
64
|
+
time_range: Optional[TimeRange] = None,
|
|
65
|
+
bounds: Optional[BoundsLike] = None,
|
|
66
|
+
area: Optional[BaseGeometry] = None,
|
|
67
|
+
query: Optional[str] = None,
|
|
68
|
+
):
|
|
69
|
+
search = self._search(
|
|
70
|
+
time_range=time_range,
|
|
71
|
+
bounds=bounds,
|
|
72
|
+
area=box(*area.bounds) if area else None,
|
|
73
|
+
query=query,
|
|
74
|
+
config=config,
|
|
75
|
+
)
|
|
76
|
+
logger.debug("found %s products", search.matched())
|
|
77
|
+
matched = search.matched() or 0
|
|
78
|
+
if matched > config.catalog_chunk_threshold: # pragma: no cover
|
|
79
|
+
spatial_search_chunks = SpatialSearchChunks(
|
|
80
|
+
bounds=bounds,
|
|
81
|
+
area=area,
|
|
82
|
+
grid="geodetic",
|
|
83
|
+
zoom=config.catalog_chunk_zoom,
|
|
84
|
+
)
|
|
85
|
+
logger.debug(
|
|
86
|
+
"too many products (%s), query catalog in %s chunks",
|
|
87
|
+
matched,
|
|
88
|
+
len(spatial_search_chunks),
|
|
89
|
+
)
|
|
90
|
+
for counter, chunk_kwargs in enumerate(spatial_search_chunks, 1):
|
|
91
|
+
with Timer() as duration:
|
|
92
|
+
chunk_search = self._search(
|
|
93
|
+
time_range=time_range,
|
|
94
|
+
query=query,
|
|
95
|
+
config=config,
|
|
96
|
+
**chunk_kwargs,
|
|
97
|
+
)
|
|
98
|
+
yield chunk_search
|
|
99
|
+
logger.debug(
|
|
100
|
+
"returned chunk %s/%s (%s items) in %s",
|
|
101
|
+
counter,
|
|
102
|
+
len(spatial_search_chunks),
|
|
103
|
+
chunk_search.matched(),
|
|
104
|
+
duration,
|
|
105
|
+
)
|
|
106
|
+
else:
|
|
107
|
+
yield search
|
|
108
|
+
|
|
109
|
+
if time:
|
|
110
|
+
# search time range(s)
|
|
111
|
+
for time_range in time if isinstance(time, list) else [time]:
|
|
112
|
+
yield from _search_chunks(
|
|
113
|
+
time_range=time_range,
|
|
114
|
+
bounds=bounds,
|
|
115
|
+
area=area,
|
|
116
|
+
query=query,
|
|
117
|
+
)
|
|
118
|
+
else:
|
|
119
|
+
# don't apply temporal filter
|
|
120
|
+
yield from _search_chunks(
|
|
121
|
+
bounds=bounds,
|
|
122
|
+
area=area,
|
|
123
|
+
query=query,
|
|
124
|
+
)
|
|
125
|
+
|
|
126
|
+
with patch_invalid_assets():
|
|
127
|
+
for search in _searches():
|
|
128
|
+
for item in search.items():
|
|
129
|
+
if item.get_self_href() in self.blacklist: # pragma: no cover
|
|
130
|
+
logger.debug(
|
|
131
|
+
"item %s found in blacklist and skipping",
|
|
132
|
+
item.get_self_href(),
|
|
133
|
+
)
|
|
134
|
+
continue
|
|
135
|
+
yield item
|
|
136
|
+
|
|
137
|
+
@cached_property
|
|
138
|
+
def default_search_params(self):
|
|
139
|
+
return {
|
|
140
|
+
"collections": [self.client],
|
|
141
|
+
"bbox": None,
|
|
142
|
+
"intersects": None,
|
|
143
|
+
}
|
|
144
|
+
|
|
145
|
+
@cached_property
|
|
146
|
+
def search_client(self) -> Client:
|
|
147
|
+
# looks weird, right?
|
|
148
|
+
#
|
|
149
|
+
# one would assume that directly returning self.client.get_root() would
|
|
150
|
+
# do the same but if we do so, it seems to ignore the "collections" parameter
|
|
151
|
+
# and thus query all collection available on that search endpoint.
|
|
152
|
+
#
|
|
153
|
+
# the only way to fix this, is to instantiate Client from scratch.
|
|
154
|
+
return Client.from_file(self.client.get_root().self_href)
|
|
155
|
+
|
|
156
|
+
def _search(
|
|
157
|
+
self,
|
|
158
|
+
time_range: Optional[TimeRange] = None,
|
|
159
|
+
bounds: Optional[Bounds] = None,
|
|
160
|
+
area: Optional[BaseGeometry] = None,
|
|
161
|
+
query: Optional[str] = None,
|
|
162
|
+
config: StacSearchConfig = StacSearchConfig(),
|
|
163
|
+
**kwargs,
|
|
164
|
+
) -> ItemSearch:
|
|
165
|
+
if bounds is not None:
|
|
166
|
+
if shape(bounds).is_empty: # pragma: no cover
|
|
167
|
+
raise ValueError("bounds empty")
|
|
168
|
+
kwargs.update(bbox=",".join(map(str, bounds)))
|
|
169
|
+
elif area is not None:
|
|
170
|
+
if area.is_empty: # pragma: no cover
|
|
171
|
+
raise ValueError("area empty")
|
|
172
|
+
kwargs.update(intersects=area)
|
|
173
|
+
|
|
174
|
+
if time_range:
|
|
175
|
+
start = (
|
|
176
|
+
time_range.start.date()
|
|
177
|
+
if isinstance(time_range.start, datetime)
|
|
178
|
+
else time_range.start
|
|
179
|
+
)
|
|
180
|
+
end = (
|
|
181
|
+
time_range.end.date()
|
|
182
|
+
if isinstance(time_range.end, datetime)
|
|
183
|
+
else time_range.end
|
|
184
|
+
)
|
|
185
|
+
search_params = dict(
|
|
186
|
+
self.default_search_params,
|
|
187
|
+
datetime=f"{start}/{end}",
|
|
188
|
+
query=[query] if query else None,
|
|
189
|
+
**kwargs,
|
|
190
|
+
)
|
|
191
|
+
else:
|
|
192
|
+
search_params = dict(
|
|
193
|
+
self.default_search_params,
|
|
194
|
+
query=[query] if query else None,
|
|
195
|
+
**kwargs,
|
|
196
|
+
)
|
|
197
|
+
if (
|
|
198
|
+
bounds is None
|
|
199
|
+
and area is None
|
|
200
|
+
and kwargs.get("bbox", kwargs.get("intersects")) is None
|
|
201
|
+
): # pragma: no cover
|
|
202
|
+
raise ValueError("no bounds or area given")
|
|
203
|
+
logger.debug("query catalog using params: %s", search_params)
|
|
204
|
+
with Timer() as duration:
|
|
205
|
+
result = self.search_client.search(
|
|
206
|
+
**search_params, limit=config.catalog_pagesize
|
|
207
|
+
)
|
|
208
|
+
logger.debug("query took %s", str(duration))
|
|
209
|
+
return result
|
|
210
|
+
|
|
211
|
+
|
|
212
|
+
class SpatialSearchChunks:
|
|
213
|
+
"""
|
|
214
|
+
Split spatial search areas into smaller chunks for large queries.
|
|
215
|
+
"""
|
|
216
|
+
|
|
217
|
+
bounds: Bounds
|
|
218
|
+
area: BaseGeometry
|
|
219
|
+
search_kw: str
|
|
220
|
+
tile_pyramid: BufferedTilePyramid
|
|
221
|
+
zoom: int
|
|
222
|
+
|
|
223
|
+
def __init__(
|
|
224
|
+
self,
|
|
225
|
+
bounds: Optional[BoundsLike] = None,
|
|
226
|
+
area: Optional[BaseGeometry] = None,
|
|
227
|
+
zoom: int = 6,
|
|
228
|
+
grid: str = "geodetic",
|
|
229
|
+
):
|
|
230
|
+
if bounds is not None:
|
|
231
|
+
self.bounds = Bounds.from_inp(bounds)
|
|
232
|
+
self.area = None
|
|
233
|
+
self.search_kw = "bbox"
|
|
234
|
+
elif area is not None:
|
|
235
|
+
self.bounds = None
|
|
236
|
+
self.area = area
|
|
237
|
+
self.search_kw = "intersects"
|
|
238
|
+
else: # pragma: no cover
|
|
239
|
+
raise ValueError("either area or bounds have to be given")
|
|
240
|
+
self.zoom = zoom
|
|
241
|
+
self.tile_pyramid = BufferedTilePyramid(grid)
|
|
242
|
+
|
|
243
|
+
@cached_property
|
|
244
|
+
def _chunks(self) -> List[Union[Bounds, BaseGeometry]]:
|
|
245
|
+
if self.bounds is not None:
|
|
246
|
+
bounds = self.bounds
|
|
247
|
+
# if bounds cross the antimeridian, snap them to CRS bouds
|
|
248
|
+
if self.bounds.left < self.tile_pyramid.left:
|
|
249
|
+
logger.warning("snap left bounds value back to CRS bounds")
|
|
250
|
+
bounds = Bounds(
|
|
251
|
+
self.tile_pyramid.left,
|
|
252
|
+
self.bounds.bottom,
|
|
253
|
+
self.bounds.right,
|
|
254
|
+
self.bounds.top,
|
|
255
|
+
)
|
|
256
|
+
if self.bounds.right > self.tile_pyramid.right:
|
|
257
|
+
logger.warning("snap right bounds value back to CRS bounds")
|
|
258
|
+
bounds = Bounds(
|
|
259
|
+
self.bounds.left,
|
|
260
|
+
self.bounds.bottom,
|
|
261
|
+
self.tile_pyramid.right,
|
|
262
|
+
self.bounds.top,
|
|
263
|
+
)
|
|
264
|
+
return [
|
|
265
|
+
list(Bounds.from_inp(tile.bbox.intersection(shape(bounds))))
|
|
266
|
+
for tile in self.tile_pyramid.tiles_from_bounds(bounds, zoom=self.zoom)
|
|
267
|
+
]
|
|
268
|
+
else:
|
|
269
|
+
return [
|
|
270
|
+
tile.bbox.intersection(self.area)
|
|
271
|
+
for tile in self.tile_pyramid.tiles_from_geom(self.area, zoom=self.zoom)
|
|
272
|
+
]
|
|
273
|
+
|
|
274
|
+
def __len__(self) -> int:
|
|
275
|
+
return len(self._chunks)
|
|
276
|
+
|
|
277
|
+
def __iter__(self) -> Iterator[dict]:
|
|
278
|
+
return iter([{self.search_kw: chunk} for chunk in self._chunks])
|