mapchete-eo 2026.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (89) hide show
  1. mapchete_eo/__init__.py +1 -0
  2. mapchete_eo/array/__init__.py +0 -0
  3. mapchete_eo/array/buffer.py +16 -0
  4. mapchete_eo/array/color.py +29 -0
  5. mapchete_eo/array/convert.py +163 -0
  6. mapchete_eo/base.py +653 -0
  7. mapchete_eo/blacklist.txt +175 -0
  8. mapchete_eo/cli/__init__.py +30 -0
  9. mapchete_eo/cli/bounds.py +22 -0
  10. mapchete_eo/cli/options_arguments.py +227 -0
  11. mapchete_eo/cli/s2_brdf.py +77 -0
  12. mapchete_eo/cli/s2_cat_results.py +130 -0
  13. mapchete_eo/cli/s2_find_broken_products.py +77 -0
  14. mapchete_eo/cli/s2_jp2_static_catalog.py +166 -0
  15. mapchete_eo/cli/s2_mask.py +71 -0
  16. mapchete_eo/cli/s2_mgrs.py +45 -0
  17. mapchete_eo/cli/s2_rgb.py +114 -0
  18. mapchete_eo/cli/s2_verify.py +129 -0
  19. mapchete_eo/cli/static_catalog.py +82 -0
  20. mapchete_eo/eostac.py +30 -0
  21. mapchete_eo/exceptions.py +87 -0
  22. mapchete_eo/image_operations/__init__.py +12 -0
  23. mapchete_eo/image_operations/blend_functions.py +579 -0
  24. mapchete_eo/image_operations/color_correction.py +136 -0
  25. mapchete_eo/image_operations/compositing.py +266 -0
  26. mapchete_eo/image_operations/dtype_scale.py +43 -0
  27. mapchete_eo/image_operations/fillnodata.py +130 -0
  28. mapchete_eo/image_operations/filters.py +319 -0
  29. mapchete_eo/image_operations/linear_normalization.py +81 -0
  30. mapchete_eo/image_operations/sigmoidal.py +114 -0
  31. mapchete_eo/io/__init__.py +37 -0
  32. mapchete_eo/io/assets.py +496 -0
  33. mapchete_eo/io/items.py +162 -0
  34. mapchete_eo/io/levelled_cubes.py +259 -0
  35. mapchete_eo/io/path.py +155 -0
  36. mapchete_eo/io/products.py +423 -0
  37. mapchete_eo/io/profiles.py +45 -0
  38. mapchete_eo/platforms/sentinel2/__init__.py +17 -0
  39. mapchete_eo/platforms/sentinel2/_mapper_registry.py +89 -0
  40. mapchete_eo/platforms/sentinel2/bandpass_adjustment.py +104 -0
  41. mapchete_eo/platforms/sentinel2/brdf/__init__.py +8 -0
  42. mapchete_eo/platforms/sentinel2/brdf/config.py +32 -0
  43. mapchete_eo/platforms/sentinel2/brdf/correction.py +260 -0
  44. mapchete_eo/platforms/sentinel2/brdf/hls.py +251 -0
  45. mapchete_eo/platforms/sentinel2/brdf/models.py +44 -0
  46. mapchete_eo/platforms/sentinel2/brdf/protocols.py +27 -0
  47. mapchete_eo/platforms/sentinel2/brdf/ross_thick.py +136 -0
  48. mapchete_eo/platforms/sentinel2/brdf/sun_angle_arrays.py +76 -0
  49. mapchete_eo/platforms/sentinel2/config.py +241 -0
  50. mapchete_eo/platforms/sentinel2/driver.py +43 -0
  51. mapchete_eo/platforms/sentinel2/masks.py +329 -0
  52. mapchete_eo/platforms/sentinel2/metadata_parser/__init__.py +6 -0
  53. mapchete_eo/platforms/sentinel2/metadata_parser/base.py +56 -0
  54. mapchete_eo/platforms/sentinel2/metadata_parser/default_path_mapper.py +135 -0
  55. mapchete_eo/platforms/sentinel2/metadata_parser/models.py +78 -0
  56. mapchete_eo/platforms/sentinel2/metadata_parser/s2metadata.py +639 -0
  57. mapchete_eo/platforms/sentinel2/preconfigured_sources/__init__.py +57 -0
  58. mapchete_eo/platforms/sentinel2/preconfigured_sources/guessers.py +108 -0
  59. mapchete_eo/platforms/sentinel2/preconfigured_sources/item_mappers.py +171 -0
  60. mapchete_eo/platforms/sentinel2/preconfigured_sources/metadata_xml_mappers.py +217 -0
  61. mapchete_eo/platforms/sentinel2/preprocessing_tasks.py +50 -0
  62. mapchete_eo/platforms/sentinel2/processing_baseline.py +163 -0
  63. mapchete_eo/platforms/sentinel2/product.py +747 -0
  64. mapchete_eo/platforms/sentinel2/source.py +114 -0
  65. mapchete_eo/platforms/sentinel2/types.py +114 -0
  66. mapchete_eo/processes/__init__.py +0 -0
  67. mapchete_eo/processes/config.py +51 -0
  68. mapchete_eo/processes/dtype_scale.py +112 -0
  69. mapchete_eo/processes/eo_to_xarray.py +19 -0
  70. mapchete_eo/processes/merge_rasters.py +239 -0
  71. mapchete_eo/product.py +323 -0
  72. mapchete_eo/protocols.py +61 -0
  73. mapchete_eo/search/__init__.py +14 -0
  74. mapchete_eo/search/base.py +285 -0
  75. mapchete_eo/search/config.py +113 -0
  76. mapchete_eo/search/s2_mgrs.py +313 -0
  77. mapchete_eo/search/stac_search.py +278 -0
  78. mapchete_eo/search/stac_static.py +197 -0
  79. mapchete_eo/search/utm_search.py +251 -0
  80. mapchete_eo/settings.py +25 -0
  81. mapchete_eo/sort.py +60 -0
  82. mapchete_eo/source.py +109 -0
  83. mapchete_eo/time.py +62 -0
  84. mapchete_eo/types.py +76 -0
  85. mapchete_eo-2026.2.0.dist-info/METADATA +91 -0
  86. mapchete_eo-2026.2.0.dist-info/RECORD +89 -0
  87. mapchete_eo-2026.2.0.dist-info/WHEEL +4 -0
  88. mapchete_eo-2026.2.0.dist-info/entry_points.txt +11 -0
  89. mapchete_eo-2026.2.0.dist-info/licenses/LICENSE +21 -0
@@ -0,0 +1,113 @@
1
+ import logging
2
+
3
+
4
+ from contextlib import contextmanager
5
+ from typing import Optional, Dict, Any
6
+
7
+ from mapchete.path import MPath, MPathLike
8
+ from pydantic import BaseModel, model_validator
9
+
10
+
11
+ class StacSearchConfig(BaseModel):
12
+ max_cloud_cover: float = 100.0
13
+ query: Optional[str] = None
14
+ catalog_chunk_threshold: int = 10_000
15
+ catalog_chunk_zoom: int = 5
16
+ catalog_pagesize: int = 100
17
+ footprint_buffer: float = 0
18
+
19
+ @model_validator(mode="before")
20
+ def deprecate_max_cloud_cover(cls, values: Dict[str, Any]) -> Dict[str, Any]:
21
+ if "max_cloud_cover" in values: # pragma: no cover
22
+ raise DeprecationWarning(
23
+ "'max_cloud_cover' will be deprecated soon. Please use 'eo:cloud_cover<=...' in the source 'query' field.",
24
+ )
25
+ return values
26
+
27
+
28
+ class StacStaticConfig(BaseModel):
29
+ @model_validator(mode="before")
30
+ def deprecate_max_cloud_cover(cls, values: Dict[str, Any]) -> Dict[str, Any]:
31
+ if "max_cloud_cover" in values: # pragma: no cover
32
+ raise DeprecationWarning(
33
+ "'max_cloud_cover' will be deprecated soon. Please use 'eo:cloud_cover<=...' in the source 'query' field.",
34
+ )
35
+ return values
36
+
37
+
38
+ class UTMSearchConfig(BaseModel):
39
+ @model_validator(mode="before")
40
+ def deprecate_max_cloud_cover(cls, values: Dict[str, Any]) -> Dict[str, Any]:
41
+ if "max_cloud_cover" in values: # pragma: no cover
42
+ raise DeprecationWarning(
43
+ "'max_cloud_cover' will be deprecated soon. Please use 'eo:cloud_cover<=...' in the source 'query' field.",
44
+ )
45
+ return values
46
+
47
+ sinergise_aws_collections: dict = dict(
48
+ S2_L2A=dict(
49
+ id="sentinel-s2-l2a",
50
+ path=MPath(
51
+ "https://sentinel-s2-l2a-stac.s3.amazonaws.com/sentinel-s2-l2a.json"
52
+ ),
53
+ endpoint="s3://sentinel-s2-l2a-stac",
54
+ ),
55
+ S2_L1C=dict(
56
+ id="sentinel-s2-l1c",
57
+ path=MPath(
58
+ "https://sentinel-s2-l1c-stac.s3.amazonaws.com/sentinel-s2-l1c.json"
59
+ ),
60
+ endpoint="s3://sentinel-s2-l1c-stac",
61
+ ),
62
+ S1_GRD=dict(
63
+ id="sentinel-s1-l1c",
64
+ path=MPath(
65
+ "https://sentinel-s1-l1c-stac.s3.amazonaws.com/sentinel-s1-l1c.json"
66
+ ),
67
+ endpoint="s3://sentinel-s1-l1c-stac",
68
+ ),
69
+ )
70
+ search_index: Optional[MPathLike] = None
71
+
72
+
73
+ @contextmanager
74
+ def patch_invalid_assets():
75
+ """
76
+ Context manager/decorator to fix pystac crash on malformed assets (strings instead of dicts).
77
+
78
+ """
79
+ try:
80
+ from pystac.extensions.file import FileExtensionHooks
81
+ except ImportError: # pragma: no cover
82
+ yield
83
+ return
84
+
85
+ logger = logging.getLogger(__name__)
86
+
87
+ _original_migrate = FileExtensionHooks.migrate
88
+
89
+ def _safe_migrate(self, obj, version, info):
90
+ if "assets" in obj and isinstance(obj["assets"], dict):
91
+ bad_keys = []
92
+ for key, asset in obj["assets"].items():
93
+ if not isinstance(asset, dict):
94
+ logger.debug(
95
+ "Removing malformed asset '%s' (type %s) from item %s",
96
+ key,
97
+ type(asset),
98
+ obj.get("id", "unknown"),
99
+ )
100
+ bad_keys.append(key)
101
+
102
+ for key in bad_keys:
103
+ del obj["assets"][key]
104
+
105
+ return _original_migrate(self, obj, version, info)
106
+
107
+ # Apply patch
108
+ FileExtensionHooks.migrate = _safe_migrate
109
+ try:
110
+ yield
111
+ finally:
112
+ # Restore original
113
+ FileExtensionHooks.migrate = _original_migrate
@@ -0,0 +1,313 @@
1
+ from __future__ import annotations
2
+
3
+ import math
4
+ from dataclasses import dataclass
5
+ from functools import cached_property
6
+ from itertools import product
7
+ from typing import List, Literal, Optional, Tuple, Union
8
+
9
+ from mapchete.geometry import (
10
+ reproject_geometry,
11
+ repair_antimeridian_geometry,
12
+ transform_to_latlon,
13
+ )
14
+ from mapchete.types import Bounds
15
+ from rasterio.crs import CRS
16
+ from shapely import prepare
17
+ from shapely.geometry import box, mapping, shape
18
+ from shapely.geometry.base import BaseGeometry
19
+
20
+
21
+ LATLON_LEFT = -180
22
+ LATLON_RIGHT = 180
23
+ LATLON_WIDTH = LATLON_RIGHT - LATLON_LEFT
24
+ LATLON_WIDTH_OFFSET = LATLON_WIDTH / 2
25
+ MIN_LATITUDE = -80.0
26
+ MAX_LATITUDE = 84
27
+ LATLON_HEIGHT = MAX_LATITUDE - MIN_LATITUDE
28
+ LATLON_HEIGHT_OFFSET = -MIN_LATITUDE
29
+
30
+ # width in degrees
31
+ UTM_ZONE_WIDTH = 6
32
+ UTM_ZONES = [f"{ii:02d}" for ii in range(1, LATLON_WIDTH // UTM_ZONE_WIDTH + 1)]
33
+
34
+ # NOTE: each latitude band is 8° high except the most northern one ("X") is 12°
35
+ LATITUDE_BAND_HEIGHT = 8
36
+ LATITUDE_BANDS = list("CDEFGHJKLMNPQRSTUVWX")
37
+
38
+ # column names seem to span over three UTM zones (8 per zone)
39
+ COLUMNS_PER_ZONE = 8
40
+ SQUARE_COLUMNS = list("ABCDEFGHJKLMNPQRSTUVWXYZ")
41
+
42
+ # rows are weird. zone 01 starts at -80° with "M", then zone 02 with "S", then zone 03 with "M" and so on
43
+ # SQUARE_ROW_START = ["M", "S"]
44
+ # SQUARE_ROW_START = ["B", "G"] # manual offset so the naming starts on the South Pole
45
+ SQUARE_ROW_START = ["A", "F"]
46
+ SQUARE_ROWS = list("ABCDEFGHJKLMNPQRSTUV")
47
+
48
+ # 100 x 100 km
49
+ TILE_WIDTH_M = 100_000
50
+ TILE_HEIGHT_M = 100_000
51
+ # overlap for bottom and right
52
+ TILE_OVERLAP_M = 9_800
53
+
54
+ # source point of UTM zone from where tiles start
55
+ # UTM_TILE_SOURCE_LEFT = 99_960.0
56
+ UTM_TILE_SOURCE_LEFT = 100_000
57
+ UTM_TILE_SOURCE_BOTTOM = 0
58
+
59
+
60
+ class InvalidMGRSSquare(Exception):
61
+ """Raised when an invalid square index has been given"""
62
+
63
+
64
+ @dataclass(frozen=True)
65
+ class MGRSCell:
66
+ utm_zone: str
67
+ latitude_band: str
68
+
69
+ def tiles(self) -> List[S2Tile]:
70
+ # TODO: this is incredibly slow
71
+ def tiles_generator():
72
+ for column_index, row_index in self._global_square_indexes:
73
+ tile = self.tile(
74
+ grid_square=self._global_square_index_to_grid_square(
75
+ column_index, row_index
76
+ ),
77
+ column_index=column_index,
78
+ row_index=row_index,
79
+ )
80
+ if tile.latlon_geometry.intersects(self.latlon_geometry):
81
+ yield tile
82
+
83
+ return list(tiles_generator())
84
+
85
+ def tile(
86
+ self,
87
+ grid_square: str,
88
+ column_index: Optional[int] = None,
89
+ row_index: Optional[int] = None,
90
+ ) -> S2Tile:
91
+ if column_index is None or row_index is None:
92
+ for column_index, row_index in self._global_square_indexes:
93
+ if (
94
+ self._global_square_index_to_grid_square(column_index, row_index)
95
+ == grid_square
96
+ ):
97
+ break
98
+ else: # pragma: no cover
99
+ raise InvalidMGRSSquare(
100
+ f"global square index could not be determined for {self.utm_zone}{self.latitude_band}{grid_square}"
101
+ )
102
+
103
+ return S2Tile(
104
+ utm_zone=self.utm_zone,
105
+ latitude_band=self.latitude_band,
106
+ grid_square=grid_square,
107
+ global_column_index=column_index,
108
+ global_row_index=row_index,
109
+ )
110
+
111
+ @cached_property
112
+ def _global_square_indexes(self) -> List[Tuple[int, int]]:
113
+ """Return global row/column indexes of squares within MGRSCell."""
114
+
115
+ # reproject cell bounds to UTM
116
+ utm_bounds = Bounds(
117
+ *reproject_geometry(
118
+ self.latlon_geometry, src_crs="EPSG:4326", dst_crs=self.crs
119
+ ).bounds
120
+ )
121
+ # get min/max column index values based on tile grid source and tile width/height
122
+ min_col = UTM_ZONES.index(self.utm_zone) * COLUMNS_PER_ZONE
123
+ max_col = min_col + COLUMNS_PER_ZONE
124
+
125
+ # count rows from UTM zone bottom
126
+ min_row = math.floor(
127
+ (utm_bounds.bottom - UTM_TILE_SOURCE_BOTTOM) / TILE_HEIGHT_M
128
+ )
129
+ max_row = math.floor((utm_bounds.top - UTM_TILE_SOURCE_BOTTOM) / TILE_HEIGHT_M)
130
+ return list(product(range(min_col, max_col + 1), range(min_row, max_row + 1)))
131
+
132
+ def _global_square_index_to_grid_square(
133
+ self, column_index: int, row_index: int
134
+ ) -> str:
135
+ # determine row offset (alternating rows at bottom start at "A" or "F")
136
+ start_row = SQUARE_ROW_START[
137
+ UTM_ZONES.index(self.utm_zone) % len(SQUARE_ROW_START)
138
+ ]
139
+ start_row_idx = SQUARE_ROWS.index(start_row)
140
+
141
+ square_column_idx = column_index % len(SQUARE_COLUMNS)
142
+ square_row_idx = (row_index + start_row_idx) % len(SQUARE_ROWS)
143
+
144
+ return f"{SQUARE_COLUMNS[square_column_idx]}{SQUARE_ROWS[square_row_idx]}"
145
+
146
+ @cached_property
147
+ def latlon_bounds(self) -> Bounds:
148
+ left = LATLON_LEFT + UTM_ZONE_WIDTH * UTM_ZONES.index(self.utm_zone)
149
+ bottom = MIN_LATITUDE + LATITUDE_BAND_HEIGHT * LATITUDE_BANDS.index(
150
+ self.latitude_band
151
+ )
152
+ right = left + UTM_ZONE_WIDTH
153
+ top = bottom + (12 if self.latitude_band == "X" else LATITUDE_BAND_HEIGHT)
154
+ return Bounds(left, bottom, right, top)
155
+
156
+ @cached_property
157
+ def crs(self) -> CRS:
158
+ # 7 for south, 6 for north
159
+ hemisphere_code = "7" if self.hemisphere == "S" else "6"
160
+ return CRS.from_string(f"EPSG:32{hemisphere_code}{self.utm_zone}")
161
+
162
+ @cached_property
163
+ def latlon_geometry(self) -> BaseGeometry:
164
+ return shape(self.latlon_bounds)
165
+
166
+ @cached_property
167
+ def hemisphere(self) -> Union[Literal["S"], Literal["N"]]:
168
+ return "S" if self.latitude_band < "N" else "N"
169
+
170
+
171
+ @dataclass(frozen=True)
172
+ class S2Tile:
173
+ utm_zone: str
174
+ latitude_band: str
175
+ grid_square: str
176
+ global_column_index: Optional[int] = None
177
+ global_row_index: Optional[int] = None
178
+
179
+ @cached_property
180
+ def crs(self) -> CRS:
181
+ # 7 for south, 6 for north
182
+ hemisphere = "7" if self.latitude_band < "N" else "6"
183
+ return CRS.from_string(f"EPSG:32{hemisphere}{self.utm_zone}")
184
+
185
+ @cached_property
186
+ def bounds(self) -> Bounds:
187
+ base_bottom = UTM_TILE_SOURCE_BOTTOM + self.square_row * TILE_WIDTH_M
188
+ left = UTM_TILE_SOURCE_LEFT + self.square_column * TILE_WIDTH_M
189
+ bottom = base_bottom - TILE_OVERLAP_M
190
+ right = left + TILE_WIDTH_M + TILE_OVERLAP_M
191
+ top = base_bottom + TILE_HEIGHT_M
192
+ return Bounds(left, bottom, right, top)
193
+
194
+ @cached_property
195
+ def __geo_interface__(self) -> dict:
196
+ return mapping(box(*self.bounds))
197
+
198
+ @cached_property
199
+ def mgrs_cell(self) -> MGRSCell:
200
+ return MGRSCell(self.utm_zone, self.latitude_band)
201
+
202
+ @cached_property
203
+ def latlon_geometry(self) -> BaseGeometry:
204
+ # return repair_antimeridian_geometry(shape(self.latlon_bounds))
205
+ return repair_antimeridian_geometry(transform_to_latlon(shape(self), self.crs))
206
+
207
+ @cached_property
208
+ def latlon_bounds(self) -> Bounds:
209
+ return Bounds.from_inp(self.latlon_geometry)
210
+
211
+ @cached_property
212
+ def tile_id(self) -> str:
213
+ return f"{self.utm_zone}{self.latitude_band}{self.grid_square}"
214
+
215
+ @cached_property
216
+ def square_column(self) -> int:
217
+ if self.global_column_index is None:
218
+ return self._global_square_idx[0] % COLUMNS_PER_ZONE
219
+ return self.global_column_index % COLUMNS_PER_ZONE
220
+
221
+ @cached_property
222
+ def square_row(self) -> int:
223
+ if self.global_row_index is None:
224
+ return self._global_square_idx[1]
225
+ return self.global_row_index
226
+
227
+ @cached_property
228
+ def _global_square_idx(self) -> Tuple[int, int]:
229
+ """
230
+ Square index based on bottom-left corner of global AOI.
231
+ """
232
+ for column_index, row_index in self.mgrs_cell._global_square_indexes:
233
+ if (
234
+ self.mgrs_cell._global_square_index_to_grid_square(
235
+ column_index, row_index
236
+ )
237
+ == self.grid_square
238
+ ):
239
+ return (column_index, row_index)
240
+ else: # pragma: no cover
241
+ raise InvalidMGRSSquare(
242
+ f"global square index could not be determined for {self.utm_zone}{self.latitude_band}{self.grid_square}"
243
+ )
244
+
245
+ @cached_property
246
+ def hemisphere(self) -> Union[Literal["S"], Literal["N"]]:
247
+ return "S" if self.latitude_band < "N" else "N"
248
+
249
+ @staticmethod
250
+ def from_tile_id(tile_id: str) -> S2Tile:
251
+ tile_id = tile_id.lstrip("T")
252
+ utm_zone = tile_id[:2]
253
+ latitude_band = tile_id[2]
254
+ grid_square = tile_id[3:]
255
+ try:
256
+ int(utm_zone)
257
+ except Exception: # pragma: no cover
258
+ raise ValueError(f"invalid UTM zone given: {utm_zone}")
259
+
260
+ return MGRSCell(utm_zone, latitude_band).tile(grid_square)
261
+
262
+ @staticmethod
263
+ def from_grid_code(grid_code: str) -> S2Tile:
264
+ return S2Tile.from_tile_id(grid_code.lstrip("MGRS-"))
265
+
266
+
267
+ def s2_tiles_from_bounds(
268
+ left: float, bottom: float, right: float, top: float
269
+ ) -> List[S2Tile]:
270
+ bounds = Bounds(left, bottom, right, top, crs="EPSG:4326")
271
+
272
+ # determine zones in eastern-western direction
273
+ min_zone_idx = math.floor((left + LATLON_WIDTH_OFFSET) / UTM_ZONE_WIDTH)
274
+ max_zone_idx = math.floor((right + LATLON_WIDTH_OFFSET) / UTM_ZONE_WIDTH)
275
+
276
+ min_latitude_band_idx = math.floor(
277
+ (bottom + LATLON_HEIGHT_OFFSET) / LATITUDE_BAND_HEIGHT
278
+ )
279
+ max_latitude_band_idx = min(
280
+ [
281
+ math.floor((top + LATLON_HEIGHT_OFFSET) / LATITUDE_BAND_HEIGHT),
282
+ len(LATITUDE_BANDS),
283
+ ]
284
+ )
285
+
286
+ # in order to also get overlapping tiles from other UTM cells, we also
287
+ # query the neighbors:
288
+ min_zone_idx -= 1
289
+ max_zone_idx += 1
290
+ min_latitude_band_idx -= 1
291
+ max_latitude_band_idx += 1
292
+
293
+ aoi = bounds.latlon_geometry()
294
+ prepare(aoi)
295
+
296
+ def tiles_generator():
297
+ for utm_zone_idx in range(min_zone_idx, max_zone_idx + 1):
298
+ for latitude_band_idx in range(
299
+ # clamp latitude index to range of 0 and number of latitude bands
300
+ max(min_latitude_band_idx, 0),
301
+ min(max_latitude_band_idx + 1, len(LATITUDE_BANDS)),
302
+ ):
303
+ cell = MGRSCell(
304
+ utm_zone=UTM_ZONES[utm_zone_idx % len(UTM_ZONES)],
305
+ latitude_band=LATITUDE_BANDS[latitude_band_idx],
306
+ )
307
+ for tile in cell.tiles():
308
+ # bounds check seems to be faster
309
+ # if aoi.intersects(box(*tile.latlon_bounds)):
310
+ if aoi.intersects(tile.latlon_geometry):
311
+ yield tile
312
+
313
+ return list(tiles_generator())
@@ -0,0 +1,278 @@
1
+ from __future__ import annotations
2
+
3
+ import logging
4
+ from datetime import datetime
5
+ from functools import cached_property
6
+ from typing import Any, Dict, Generator, Iterator, List, Optional, Union
7
+
8
+ from mapchete import Timer
9
+ from mapchete.tile import BufferedTilePyramid
10
+ from mapchete.types import Bounds, BoundsLike
11
+ from pystac import Item
12
+ from pystac_client import Client, CollectionClient, ItemSearch
13
+ from shapely.geometry import shape, box
14
+ from shapely.geometry.base import BaseGeometry
15
+
16
+ from mapchete_eo.search.base import CollectionSearcher, StaticCollectionWriterMixin
17
+ from mapchete_eo.search.config import StacSearchConfig, patch_invalid_assets
18
+ from mapchete_eo.types import TimeRange
19
+
20
+ logger = logging.getLogger(__name__)
21
+
22
+
23
+ class STACSearchCollection(StaticCollectionWriterMixin, CollectionSearcher):
24
+ """
25
+ Search implementation for STAC APIs.
26
+ """
27
+
28
+ collection: str
29
+ config_cls = StacSearchConfig
30
+
31
+ @cached_property
32
+ def client(self) -> CollectionClient:
33
+ return CollectionClient.from_file(self.collection)
34
+
35
+ @cached_property
36
+ def eo_bands(self) -> List[str]:
37
+ item_assets = self.client.extra_fields.get("item_assets", {})
38
+ for v in item_assets.values():
39
+ if "eo:bands" in v and "data" in v.get("roles", []):
40
+ return ["eo:bands"]
41
+ else: # pragma: no cover
42
+ logger.debug("cannot find eo:bands definition from collections")
43
+ return []
44
+
45
+ def search(
46
+ self,
47
+ time: Optional[Union[TimeRange, List[TimeRange]]] = None,
48
+ bounds: Optional[BoundsLike] = None,
49
+ area: Optional[BaseGeometry] = None,
50
+ query: Optional[str] = None,
51
+ search_kwargs: Optional[Dict[str, Any]] = None,
52
+ ) -> Generator[Item, None, None]:
53
+ config = self.config_cls(**search_kwargs or {})
54
+ if bounds:
55
+ bounds = Bounds.from_inp(bounds)
56
+ if area is None and bounds is None: # pragma: no cover
57
+ raise ValueError("either bounds or area have to be given")
58
+
59
+ if area is not None and area.is_empty: # pragma: no cover
60
+ return
61
+
62
+ def _searches() -> Generator[ItemSearch, None, None]:
63
+ def _search_chunks(
64
+ time_range: Optional[TimeRange] = None,
65
+ bounds: Optional[BoundsLike] = None,
66
+ area: Optional[BaseGeometry] = None,
67
+ query: Optional[str] = None,
68
+ ):
69
+ search = self._search(
70
+ time_range=time_range,
71
+ bounds=bounds,
72
+ area=box(*area.bounds) if area else None,
73
+ query=query,
74
+ config=config,
75
+ )
76
+ logger.debug("found %s products", search.matched())
77
+ matched = search.matched() or 0
78
+ if matched > config.catalog_chunk_threshold: # pragma: no cover
79
+ spatial_search_chunks = SpatialSearchChunks(
80
+ bounds=bounds,
81
+ area=area,
82
+ grid="geodetic",
83
+ zoom=config.catalog_chunk_zoom,
84
+ )
85
+ logger.debug(
86
+ "too many products (%s), query catalog in %s chunks",
87
+ matched,
88
+ len(spatial_search_chunks),
89
+ )
90
+ for counter, chunk_kwargs in enumerate(spatial_search_chunks, 1):
91
+ with Timer() as duration:
92
+ chunk_search = self._search(
93
+ time_range=time_range,
94
+ query=query,
95
+ config=config,
96
+ **chunk_kwargs,
97
+ )
98
+ yield chunk_search
99
+ logger.debug(
100
+ "returned chunk %s/%s (%s items) in %s",
101
+ counter,
102
+ len(spatial_search_chunks),
103
+ chunk_search.matched(),
104
+ duration,
105
+ )
106
+ else:
107
+ yield search
108
+
109
+ if time:
110
+ # search time range(s)
111
+ for time_range in time if isinstance(time, list) else [time]:
112
+ yield from _search_chunks(
113
+ time_range=time_range,
114
+ bounds=bounds,
115
+ area=area,
116
+ query=query,
117
+ )
118
+ else:
119
+ # don't apply temporal filter
120
+ yield from _search_chunks(
121
+ bounds=bounds,
122
+ area=area,
123
+ query=query,
124
+ )
125
+
126
+ with patch_invalid_assets():
127
+ for search in _searches():
128
+ for item in search.items():
129
+ if item.get_self_href() in self.blacklist: # pragma: no cover
130
+ logger.debug(
131
+ "item %s found in blacklist and skipping",
132
+ item.get_self_href(),
133
+ )
134
+ continue
135
+ yield item
136
+
137
+ @cached_property
138
+ def default_search_params(self):
139
+ return {
140
+ "collections": [self.client],
141
+ "bbox": None,
142
+ "intersects": None,
143
+ }
144
+
145
+ @cached_property
146
+ def search_client(self) -> Client:
147
+ # looks weird, right?
148
+ #
149
+ # one would assume that directly returning self.client.get_root() would
150
+ # do the same but if we do so, it seems to ignore the "collections" parameter
151
+ # and thus query all collection available on that search endpoint.
152
+ #
153
+ # the only way to fix this, is to instantiate Client from scratch.
154
+ return Client.from_file(self.client.get_root().self_href)
155
+
156
+ def _search(
157
+ self,
158
+ time_range: Optional[TimeRange] = None,
159
+ bounds: Optional[Bounds] = None,
160
+ area: Optional[BaseGeometry] = None,
161
+ query: Optional[str] = None,
162
+ config: StacSearchConfig = StacSearchConfig(),
163
+ **kwargs,
164
+ ) -> ItemSearch:
165
+ if bounds is not None:
166
+ if shape(bounds).is_empty: # pragma: no cover
167
+ raise ValueError("bounds empty")
168
+ kwargs.update(bbox=",".join(map(str, bounds)))
169
+ elif area is not None:
170
+ if area.is_empty: # pragma: no cover
171
+ raise ValueError("area empty")
172
+ kwargs.update(intersects=area)
173
+
174
+ if time_range:
175
+ start = (
176
+ time_range.start.date()
177
+ if isinstance(time_range.start, datetime)
178
+ else time_range.start
179
+ )
180
+ end = (
181
+ time_range.end.date()
182
+ if isinstance(time_range.end, datetime)
183
+ else time_range.end
184
+ )
185
+ search_params = dict(
186
+ self.default_search_params,
187
+ datetime=f"{start}/{end}",
188
+ query=[query] if query else None,
189
+ **kwargs,
190
+ )
191
+ else:
192
+ search_params = dict(
193
+ self.default_search_params,
194
+ query=[query] if query else None,
195
+ **kwargs,
196
+ )
197
+ if (
198
+ bounds is None
199
+ and area is None
200
+ and kwargs.get("bbox", kwargs.get("intersects")) is None
201
+ ): # pragma: no cover
202
+ raise ValueError("no bounds or area given")
203
+ logger.debug("query catalog using params: %s", search_params)
204
+ with Timer() as duration:
205
+ result = self.search_client.search(
206
+ **search_params, limit=config.catalog_pagesize
207
+ )
208
+ logger.debug("query took %s", str(duration))
209
+ return result
210
+
211
+
212
+ class SpatialSearchChunks:
213
+ """
214
+ Split spatial search areas into smaller chunks for large queries.
215
+ """
216
+
217
+ bounds: Bounds
218
+ area: BaseGeometry
219
+ search_kw: str
220
+ tile_pyramid: BufferedTilePyramid
221
+ zoom: int
222
+
223
+ def __init__(
224
+ self,
225
+ bounds: Optional[BoundsLike] = None,
226
+ area: Optional[BaseGeometry] = None,
227
+ zoom: int = 6,
228
+ grid: str = "geodetic",
229
+ ):
230
+ if bounds is not None:
231
+ self.bounds = Bounds.from_inp(bounds)
232
+ self.area = None
233
+ self.search_kw = "bbox"
234
+ elif area is not None:
235
+ self.bounds = None
236
+ self.area = area
237
+ self.search_kw = "intersects"
238
+ else: # pragma: no cover
239
+ raise ValueError("either area or bounds have to be given")
240
+ self.zoom = zoom
241
+ self.tile_pyramid = BufferedTilePyramid(grid)
242
+
243
+ @cached_property
244
+ def _chunks(self) -> List[Union[Bounds, BaseGeometry]]:
245
+ if self.bounds is not None:
246
+ bounds = self.bounds
247
+ # if bounds cross the antimeridian, snap them to CRS bouds
248
+ if self.bounds.left < self.tile_pyramid.left:
249
+ logger.warning("snap left bounds value back to CRS bounds")
250
+ bounds = Bounds(
251
+ self.tile_pyramid.left,
252
+ self.bounds.bottom,
253
+ self.bounds.right,
254
+ self.bounds.top,
255
+ )
256
+ if self.bounds.right > self.tile_pyramid.right:
257
+ logger.warning("snap right bounds value back to CRS bounds")
258
+ bounds = Bounds(
259
+ self.bounds.left,
260
+ self.bounds.bottom,
261
+ self.tile_pyramid.right,
262
+ self.bounds.top,
263
+ )
264
+ return [
265
+ list(Bounds.from_inp(tile.bbox.intersection(shape(bounds))))
266
+ for tile in self.tile_pyramid.tiles_from_bounds(bounds, zoom=self.zoom)
267
+ ]
268
+ else:
269
+ return [
270
+ tile.bbox.intersection(self.area)
271
+ for tile in self.tile_pyramid.tiles_from_geom(self.area, zoom=self.zoom)
272
+ ]
273
+
274
+ def __len__(self) -> int:
275
+ return len(self._chunks)
276
+
277
+ def __iter__(self) -> Iterator[dict]:
278
+ return iter([{self.search_kw: chunk} for chunk in self._chunks])