rslearn 0.0.26__py3-none-any.whl → 0.0.27__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- rslearn/data_sources/__init__.py +2 -0
- rslearn/data_sources/aws_landsat.py +44 -161
- rslearn/data_sources/aws_open_data.py +2 -4
- rslearn/data_sources/aws_sentinel1.py +1 -3
- rslearn/data_sources/aws_sentinel2_element84.py +54 -165
- rslearn/data_sources/climate_data_store.py +1 -3
- rslearn/data_sources/copernicus.py +1 -2
- rslearn/data_sources/data_source.py +1 -1
- rslearn/data_sources/direct_materialize_data_source.py +336 -0
- rslearn/data_sources/earthdaily.py +52 -155
- rslearn/data_sources/earthdatahub.py +425 -0
- rslearn/data_sources/eurocrops.py +1 -2
- rslearn/data_sources/gcp_public_data.py +1 -2
- rslearn/data_sources/google_earth_engine.py +1 -2
- rslearn/data_sources/hf_srtm.py +595 -0
- rslearn/data_sources/local_files.py +1 -1
- rslearn/data_sources/openstreetmap.py +1 -1
- rslearn/data_sources/planet.py +1 -2
- rslearn/data_sources/planet_basemap.py +1 -2
- rslearn/data_sources/planetary_computer.py +183 -186
- rslearn/data_sources/soilgrids.py +3 -3
- rslearn/data_sources/stac.py +1 -2
- rslearn/data_sources/usda_cdl.py +1 -3
- rslearn/data_sources/usgs_landsat.py +7 -254
- rslearn/data_sources/worldcereal.py +1 -1
- rslearn/data_sources/worldcover.py +1 -1
- rslearn/data_sources/worldpop.py +1 -1
- rslearn/data_sources/xyz_tiles.py +5 -9
- rslearn/models/concatenate_features.py +6 -1
- rslearn/train/{all_patches_dataset.py → all_crops_dataset.py} +120 -117
- rslearn/train/data_module.py +27 -27
- rslearn/train/dataset.py +109 -62
- rslearn/train/lightning_module.py +1 -1
- rslearn/train/model_context.py +3 -3
- rslearn/train/prediction_writer.py +69 -41
- rslearn/train/tasks/classification.py +1 -1
- rslearn/train/tasks/detection.py +5 -5
- rslearn/train/tasks/regression.py +1 -1
- rslearn/utils/__init__.py +2 -0
- rslearn/utils/geometry.py +21 -0
- rslearn/utils/m2m_api.py +251 -0
- rslearn/utils/retry_session.py +43 -0
- {rslearn-0.0.26.dist-info → rslearn-0.0.27.dist-info}/METADATA +6 -3
- {rslearn-0.0.26.dist-info → rslearn-0.0.27.dist-info}/RECORD +49 -45
- rslearn/data_sources/earthdata_srtm.py +0 -282
- {rslearn-0.0.26.dist-info → rslearn-0.0.27.dist-info}/WHEEL +0 -0
- {rslearn-0.0.26.dist-info → rslearn-0.0.27.dist-info}/entry_points.txt +0 -0
- {rslearn-0.0.26.dist-info → rslearn-0.0.27.dist-info}/licenses/LICENSE +0 -0
- {rslearn-0.0.26.dist-info → rslearn-0.0.27.dist-info}/licenses/NOTICE +0 -0
- {rslearn-0.0.26.dist-info → rslearn-0.0.27.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,336 @@
|
|
|
1
|
+
"""Base class for data sources that support direct materialization via TileStore."""
|
|
2
|
+
|
|
3
|
+
from abc import abstractmethod
|
|
4
|
+
from collections.abc import Callable
|
|
5
|
+
from typing import Any, Generic
|
|
6
|
+
|
|
7
|
+
import affine
|
|
8
|
+
import numpy.typing as npt
|
|
9
|
+
import rasterio
|
|
10
|
+
import rasterio.vrt
|
|
11
|
+
from rasterio.enums import Resampling
|
|
12
|
+
|
|
13
|
+
from rslearn.config import LayerConfig
|
|
14
|
+
from rslearn.data_sources.data_source import DataSource, ItemType
|
|
15
|
+
from rslearn.dataset import Window
|
|
16
|
+
from rslearn.dataset.materialize import RasterMaterializer
|
|
17
|
+
from rslearn.tile_stores import TileStore, TileStoreWithLayer
|
|
18
|
+
from rslearn.utils.geometry import PixelBounds, Projection
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
class DirectMaterializeDataSource(DataSource[ItemType], TileStore, Generic[ItemType]):
|
|
22
|
+
"""Base class for data sources that support direct materialization via TileStore.
|
|
23
|
+
|
|
24
|
+
This class provides common TileStore functionality for data sources that can read
|
|
25
|
+
raster data on-demand from remote sources (like cloud buckets or APIs) without
|
|
26
|
+
first ingesting into a local tile store.
|
|
27
|
+
|
|
28
|
+
Subclasses must implement:
|
|
29
|
+
- get_asset_url(): Get the URL for an asset given item name and bands
|
|
30
|
+
- get_item_by_name(): Get an item by its name
|
|
31
|
+
|
|
32
|
+
Subclasses may optionally override:
|
|
33
|
+
- get_raster_bands(): By default, we assume that items have all assets. If
|
|
34
|
+
items may have a subset of assets, override get_raster_bands to return
|
|
35
|
+
the sets of bands available for that item.
|
|
36
|
+
- get_read_callback(): Returns a callback to transform the raster array,
|
|
37
|
+
for post-processing like Sentinel-2 harmonization.
|
|
38
|
+
"""
|
|
39
|
+
|
|
40
|
+
def __init__(self, asset_bands: dict[str, list[str]]):
|
|
41
|
+
"""Initialize the DirectMaterializeDataSource.
|
|
42
|
+
|
|
43
|
+
Args:
|
|
44
|
+
asset_bands: mapping from asset key to the list of band names in that asset.
|
|
45
|
+
"""
|
|
46
|
+
self.asset_bands = asset_bands
|
|
47
|
+
|
|
48
|
+
def _get_asset_key_by_bands(self, bands: list[str]) -> str:
|
|
49
|
+
"""Get the asset key based on the band names.
|
|
50
|
+
|
|
51
|
+
Args:
|
|
52
|
+
bands: list of band names to look up.
|
|
53
|
+
|
|
54
|
+
Returns:
|
|
55
|
+
the asset key that provides those bands.
|
|
56
|
+
|
|
57
|
+
Raises:
|
|
58
|
+
ValueError: if no asset provides those bands.
|
|
59
|
+
"""
|
|
60
|
+
for asset_key, asset_bands in self.asset_bands.items():
|
|
61
|
+
if bands == asset_bands:
|
|
62
|
+
return asset_key
|
|
63
|
+
raise ValueError(f"no known asset with bands {bands}")
|
|
64
|
+
|
|
65
|
+
# --- Methods that subclasses must implement ---
|
|
66
|
+
|
|
67
|
+
@abstractmethod
|
|
68
|
+
def get_asset_url(self, item_name: str, asset_key: str) -> str:
|
|
69
|
+
"""Get the URL to read the asset for the given item and asset key.
|
|
70
|
+
|
|
71
|
+
Args:
|
|
72
|
+
item_name: the name of the item.
|
|
73
|
+
asset_key: the key identifying which asset to get.
|
|
74
|
+
|
|
75
|
+
Returns:
|
|
76
|
+
the URL to read the asset from (must be readable by rasterio).
|
|
77
|
+
"""
|
|
78
|
+
raise NotImplementedError
|
|
79
|
+
|
|
80
|
+
def get_item_by_name(self, name: str) -> ItemType:
|
|
81
|
+
"""Get an item by its name.
|
|
82
|
+
|
|
83
|
+
Subclasses must implement this method, either directly or by inheriting from
|
|
84
|
+
a class that provides it (e.g., StacDataSource).
|
|
85
|
+
|
|
86
|
+
Args:
|
|
87
|
+
name: the name of the item to get.
|
|
88
|
+
|
|
89
|
+
Returns:
|
|
90
|
+
the item object.
|
|
91
|
+
"""
|
|
92
|
+
raise NotImplementedError
|
|
93
|
+
|
|
94
|
+
# --- Optional hooks for subclasses ---
|
|
95
|
+
|
|
96
|
+
def get_read_callback(
|
|
97
|
+
self, item_name: str, asset_key: str
|
|
98
|
+
) -> Callable[[npt.NDArray[Any]], npt.NDArray[Any]] | None:
|
|
99
|
+
"""Return a callback to post-process raster data (e.g., harmonization).
|
|
100
|
+
|
|
101
|
+
Subclasses can override this to apply transformations to the raw raster data
|
|
102
|
+
after reading, such as harmonization for Sentinel-2 data.
|
|
103
|
+
|
|
104
|
+
Args:
|
|
105
|
+
item_name: the name of the item being read.
|
|
106
|
+
asset_key: the key identifying which asset is being read.
|
|
107
|
+
|
|
108
|
+
Returns:
|
|
109
|
+
A callback function that takes an array and returns a modified array,
|
|
110
|
+
or None if no post-processing is needed.
|
|
111
|
+
"""
|
|
112
|
+
return None
|
|
113
|
+
|
|
114
|
+
# --- TileStore implementation ---
|
|
115
|
+
|
|
116
|
+
def is_raster_ready(
|
|
117
|
+
self, layer_name: str, item_name: str, bands: list[str]
|
|
118
|
+
) -> bool:
|
|
119
|
+
"""Checks if this raster has been written to the store.
|
|
120
|
+
|
|
121
|
+
For remote-backed tile stores, this always returns True since data is
|
|
122
|
+
read on-demand from the remote source.
|
|
123
|
+
|
|
124
|
+
Args:
|
|
125
|
+
layer_name: the layer name or alias.
|
|
126
|
+
item_name: the item.
|
|
127
|
+
bands: the list of bands identifying which specific raster to read.
|
|
128
|
+
|
|
129
|
+
Returns:
|
|
130
|
+
True, since data is always available from the remote source.
|
|
131
|
+
"""
|
|
132
|
+
return True
|
|
133
|
+
|
|
134
|
+
def get_raster_bands(self, layer_name: str, item_name: str) -> list[list[str]]:
|
|
135
|
+
"""Get the sets of bands that have been stored for the specified item.
|
|
136
|
+
|
|
137
|
+
By default, returns all band sets from the asset_bands configuration.
|
|
138
|
+
Subclasses can override this if not all items have all assets.
|
|
139
|
+
|
|
140
|
+
Args:
|
|
141
|
+
layer_name: the layer name or alias.
|
|
142
|
+
item_name: the item.
|
|
143
|
+
|
|
144
|
+
Returns:
|
|
145
|
+
a list of lists of bands available for this item.
|
|
146
|
+
"""
|
|
147
|
+
return list(self.asset_bands.values())
|
|
148
|
+
|
|
149
|
+
def get_raster_bounds(
|
|
150
|
+
self, layer_name: str, item_name: str, bands: list[str], projection: Projection
|
|
151
|
+
) -> PixelBounds:
|
|
152
|
+
"""Get the bounds of the raster in the specified projection.
|
|
153
|
+
|
|
154
|
+
Args:
|
|
155
|
+
layer_name: the layer name or alias.
|
|
156
|
+
item_name: the item to check.
|
|
157
|
+
bands: the list of bands identifying which specific raster to read.
|
|
158
|
+
projection: the projection to get the raster's bounds in.
|
|
159
|
+
|
|
160
|
+
Returns:
|
|
161
|
+
the bounds of the raster in the projection.
|
|
162
|
+
"""
|
|
163
|
+
item = self.get_item_by_name(item_name)
|
|
164
|
+
geom = item.geometry.to_projection(projection)
|
|
165
|
+
return (
|
|
166
|
+
int(geom.shp.bounds[0]),
|
|
167
|
+
int(geom.shp.bounds[1]),
|
|
168
|
+
int(geom.shp.bounds[2]),
|
|
169
|
+
int(geom.shp.bounds[3]),
|
|
170
|
+
)
|
|
171
|
+
|
|
172
|
+
def _read_raster_from_url(
|
|
173
|
+
self,
|
|
174
|
+
url: str,
|
|
175
|
+
projection: Projection,
|
|
176
|
+
bounds: PixelBounds,
|
|
177
|
+
resampling: Resampling,
|
|
178
|
+
) -> npt.NDArray[Any]:
|
|
179
|
+
"""Read raster data from a URL with reprojection.
|
|
180
|
+
|
|
181
|
+
This is the common logic for reading raster data from a URL and reprojecting
|
|
182
|
+
it to the target projection and bounds using rasterio's WarpedVRT.
|
|
183
|
+
|
|
184
|
+
Args:
|
|
185
|
+
url: the URL to read from (must be readable by rasterio).
|
|
186
|
+
projection: the projection to read in.
|
|
187
|
+
bounds: the bounds to read.
|
|
188
|
+
resampling: the resampling method to use.
|
|
189
|
+
|
|
190
|
+
Returns:
|
|
191
|
+
the raster data as a numpy array.
|
|
192
|
+
"""
|
|
193
|
+
# Construct the transform to use for the warped dataset.
|
|
194
|
+
wanted_transform = affine.Affine(
|
|
195
|
+
projection.x_resolution,
|
|
196
|
+
0,
|
|
197
|
+
bounds[0] * projection.x_resolution,
|
|
198
|
+
0,
|
|
199
|
+
projection.y_resolution,
|
|
200
|
+
bounds[1] * projection.y_resolution,
|
|
201
|
+
)
|
|
202
|
+
|
|
203
|
+
with rasterio.open(url) as src:
|
|
204
|
+
with rasterio.vrt.WarpedVRT(
|
|
205
|
+
src,
|
|
206
|
+
crs=projection.crs,
|
|
207
|
+
transform=wanted_transform,
|
|
208
|
+
width=bounds[2] - bounds[0],
|
|
209
|
+
height=bounds[3] - bounds[1],
|
|
210
|
+
resampling=resampling,
|
|
211
|
+
) as vrt:
|
|
212
|
+
return vrt.read()
|
|
213
|
+
|
|
214
|
+
def read_raster(
|
|
215
|
+
self,
|
|
216
|
+
layer_name: str,
|
|
217
|
+
item_name: str,
|
|
218
|
+
bands: list[str],
|
|
219
|
+
projection: Projection,
|
|
220
|
+
bounds: PixelBounds,
|
|
221
|
+
resampling: Resampling = Resampling.bilinear,
|
|
222
|
+
) -> npt.NDArray[Any]:
|
|
223
|
+
"""Read raster data from the store.
|
|
224
|
+
|
|
225
|
+
Args:
|
|
226
|
+
layer_name: the layer name or alias.
|
|
227
|
+
item_name: the item to read.
|
|
228
|
+
bands: the list of bands identifying which specific raster to read.
|
|
229
|
+
projection: the projection to read in.
|
|
230
|
+
bounds: the bounds to read.
|
|
231
|
+
resampling: the resampling method to use in case reprojection is needed.
|
|
232
|
+
|
|
233
|
+
Returns:
|
|
234
|
+
the raster data as a numpy array.
|
|
235
|
+
"""
|
|
236
|
+
# Get the asset key for the requested bands
|
|
237
|
+
asset_key = self._get_asset_key_by_bands(bands)
|
|
238
|
+
|
|
239
|
+
# Get the asset URL from the subclass
|
|
240
|
+
asset_url = self.get_asset_url(item_name, asset_key)
|
|
241
|
+
|
|
242
|
+
# Read the raster data
|
|
243
|
+
raw_data = self._read_raster_from_url(asset_url, projection, bounds, resampling)
|
|
244
|
+
|
|
245
|
+
# Apply any post-processing callback
|
|
246
|
+
callback = self.get_read_callback(item_name, asset_key)
|
|
247
|
+
if callback is not None:
|
|
248
|
+
raw_data = callback(raw_data)
|
|
249
|
+
|
|
250
|
+
return raw_data
|
|
251
|
+
|
|
252
|
+
def materialize(
|
|
253
|
+
self,
|
|
254
|
+
window: Window,
|
|
255
|
+
item_groups: list[list[ItemType]],
|
|
256
|
+
layer_name: str,
|
|
257
|
+
layer_cfg: LayerConfig,
|
|
258
|
+
) -> None:
|
|
259
|
+
"""Materialize data for the window.
|
|
260
|
+
|
|
261
|
+
Args:
|
|
262
|
+
window: the window to materialize.
|
|
263
|
+
item_groups: the items from get_items.
|
|
264
|
+
layer_name: the name of this layer.
|
|
265
|
+
layer_cfg: the config of this layer.
|
|
266
|
+
"""
|
|
267
|
+
RasterMaterializer().materialize(
|
|
268
|
+
TileStoreWithLayer(self, layer_name),
|
|
269
|
+
window,
|
|
270
|
+
layer_name,
|
|
271
|
+
layer_cfg,
|
|
272
|
+
item_groups,
|
|
273
|
+
)
|
|
274
|
+
|
|
275
|
+
# --- TileStore methods that are not supported ---
|
|
276
|
+
|
|
277
|
+
def write_raster(
|
|
278
|
+
self,
|
|
279
|
+
layer_name: str,
|
|
280
|
+
item_name: str,
|
|
281
|
+
bands: list[str],
|
|
282
|
+
projection: Projection,
|
|
283
|
+
bounds: PixelBounds,
|
|
284
|
+
array: npt.NDArray[Any],
|
|
285
|
+
) -> None:
|
|
286
|
+
"""Write raster data to the store.
|
|
287
|
+
|
|
288
|
+
This is not supported for remote-backed tile stores.
|
|
289
|
+
"""
|
|
290
|
+
raise NotImplementedError(
|
|
291
|
+
"DirectMaterializeDataSource does not support writing raster data"
|
|
292
|
+
)
|
|
293
|
+
|
|
294
|
+
def write_raster_file(
|
|
295
|
+
self, layer_name: str, item_name: str, bands: list[str], fname: Any
|
|
296
|
+
) -> None:
|
|
297
|
+
"""Write raster data to the store.
|
|
298
|
+
|
|
299
|
+
This is not supported for remote-backed tile stores.
|
|
300
|
+
"""
|
|
301
|
+
raise NotImplementedError(
|
|
302
|
+
"DirectMaterializeDataSource does not support writing raster files"
|
|
303
|
+
)
|
|
304
|
+
|
|
305
|
+
def is_vector_ready(self, layer_name: str, item_name: str) -> bool:
|
|
306
|
+
"""Checks if this vector item has been written to the store.
|
|
307
|
+
|
|
308
|
+
This is not supported for remote-backed tile stores.
|
|
309
|
+
"""
|
|
310
|
+
raise NotImplementedError(
|
|
311
|
+
"DirectMaterializeDataSource does not support vector operations"
|
|
312
|
+
)
|
|
313
|
+
|
|
314
|
+
def read_vector(
|
|
315
|
+
self,
|
|
316
|
+
layer_name: str,
|
|
317
|
+
item_name: str,
|
|
318
|
+
projection: Projection,
|
|
319
|
+
bounds: PixelBounds,
|
|
320
|
+
) -> Any:
|
|
321
|
+
"""Read vector data from the store.
|
|
322
|
+
|
|
323
|
+
This is not supported for remote-backed tile stores.
|
|
324
|
+
"""
|
|
325
|
+
raise NotImplementedError(
|
|
326
|
+
"DirectMaterializeDataSource does not support vector operations"
|
|
327
|
+
)
|
|
328
|
+
|
|
329
|
+
def write_vector(self, layer_name: str, item_name: str, features: Any) -> None:
|
|
330
|
+
"""Write vector data to the store.
|
|
331
|
+
|
|
332
|
+
This is not supported for remote-backed tile stores.
|
|
333
|
+
"""
|
|
334
|
+
raise NotImplementedError(
|
|
335
|
+
"DirectMaterializeDataSource does not support vector operations"
|
|
336
|
+
)
|
|
@@ -6,27 +6,24 @@ import tempfile
|
|
|
6
6
|
from datetime import timedelta
|
|
7
7
|
from typing import Any, Literal
|
|
8
8
|
|
|
9
|
-
import affine
|
|
10
|
-
import numpy.typing as npt
|
|
11
9
|
import pystac
|
|
12
10
|
import pystac_client
|
|
13
|
-
import rasterio
|
|
14
11
|
import requests
|
|
15
12
|
import shapely
|
|
16
13
|
from earthdaily import EDSClient, EDSConfig
|
|
17
|
-
from rasterio.enums import Resampling
|
|
18
14
|
from upath import UPath
|
|
19
15
|
|
|
20
|
-
from rslearn.config import
|
|
16
|
+
from rslearn.config import QueryConfig
|
|
21
17
|
from rslearn.const import WGS84_PROJECTION
|
|
22
|
-
from rslearn.data_sources import
|
|
18
|
+
from rslearn.data_sources import DataSourceContext, Item
|
|
19
|
+
from rslearn.data_sources.direct_materialize_data_source import (
|
|
20
|
+
DirectMaterializeDataSource,
|
|
21
|
+
)
|
|
23
22
|
from rslearn.data_sources.utils import match_candidate_items_to_window
|
|
24
|
-
from rslearn.dataset import Window
|
|
25
|
-
from rslearn.dataset.materialize import RasterMaterializer
|
|
26
23
|
from rslearn.log_utils import get_logger
|
|
27
|
-
from rslearn.tile_stores import
|
|
24
|
+
from rslearn.tile_stores import TileStoreWithLayer
|
|
28
25
|
from rslearn.utils.fsspec import join_upath
|
|
29
|
-
from rslearn.utils.geometry import
|
|
26
|
+
from rslearn.utils.geometry import STGeometry
|
|
30
27
|
|
|
31
28
|
logger = get_logger(__name__)
|
|
32
29
|
|
|
@@ -62,7 +59,7 @@ class EarthDailyItem(Item):
|
|
|
62
59
|
)
|
|
63
60
|
|
|
64
61
|
|
|
65
|
-
class EarthDaily(
|
|
62
|
+
class EarthDaily(DirectMaterializeDataSource[EarthDailyItem]):
|
|
66
63
|
"""A data source for EarthDaily data.
|
|
67
64
|
|
|
68
65
|
This requires the following environment variables to be set:
|
|
@@ -111,8 +108,9 @@ class EarthDaily(DataSource, TileStore):
|
|
|
111
108
|
services "legacy" and "internal" are not supported.
|
|
112
109
|
context: the data source context.
|
|
113
110
|
"""
|
|
111
|
+
super().__init__(asset_bands=asset_bands)
|
|
112
|
+
|
|
114
113
|
self.collection_name = collection_name
|
|
115
|
-
self.asset_bands = asset_bands
|
|
116
114
|
self.query = query
|
|
117
115
|
self.sort_by = sort_by
|
|
118
116
|
self.sort_ascending = sort_ascending
|
|
@@ -221,6 +219,47 @@ class EarthDaily(DataSource, TileStore):
|
|
|
221
219
|
|
|
222
220
|
return item
|
|
223
221
|
|
|
222
|
+
# --- DirectMaterializeDataSource implementation ---
|
|
223
|
+
|
|
224
|
+
def get_asset_url(self, item_name: str, asset_key: str) -> str:
|
|
225
|
+
"""Get the URL to read the asset for the given item and asset key.
|
|
226
|
+
|
|
227
|
+
Args:
|
|
228
|
+
item_name: the name of the item.
|
|
229
|
+
asset_key: the key identifying which asset to get.
|
|
230
|
+
|
|
231
|
+
Returns:
|
|
232
|
+
the URL to read the asset from.
|
|
233
|
+
"""
|
|
234
|
+
item = self.get_item_by_name(item_name)
|
|
235
|
+
return item.asset_urls[asset_key]
|
|
236
|
+
|
|
237
|
+
def get_raster_bands(self, layer_name: str, item_name: str) -> list[list[str]]:
|
|
238
|
+
"""Get the sets of bands that have been stored for the specified item.
|
|
239
|
+
|
|
240
|
+
Args:
|
|
241
|
+
layer_name: the layer name or alias.
|
|
242
|
+
item_name: the item.
|
|
243
|
+
|
|
244
|
+
Returns:
|
|
245
|
+
a list of lists of bands available for this item.
|
|
246
|
+
"""
|
|
247
|
+
if self.skip_items_missing_assets:
|
|
248
|
+
# In this case we can assume that the item has all of the assets.
|
|
249
|
+
return list(self.asset_bands.values())
|
|
250
|
+
|
|
251
|
+
# Otherwise we have to lookup the STAC item to see which assets it has.
|
|
252
|
+
# Here we use get_item_by_name since it handles caching.
|
|
253
|
+
item = self.get_item_by_name(item_name)
|
|
254
|
+
all_bands = []
|
|
255
|
+
for asset_key, band_names in self.asset_bands.items():
|
|
256
|
+
if asset_key not in item.asset_urls:
|
|
257
|
+
continue
|
|
258
|
+
all_bands.append(band_names)
|
|
259
|
+
return all_bands
|
|
260
|
+
|
|
261
|
+
# --- DataSource implementation ---
|
|
262
|
+
|
|
224
263
|
def get_items(
|
|
225
264
|
self, geometries: list[STGeometry], query_config: QueryConfig
|
|
226
265
|
) -> list[list[list[EarthDailyItem]]]:
|
|
@@ -285,9 +324,8 @@ class EarthDaily(DataSource, TileStore):
|
|
|
285
324
|
|
|
286
325
|
return groups
|
|
287
326
|
|
|
288
|
-
def deserialize_item(self, serialized_item:
|
|
327
|
+
def deserialize_item(self, serialized_item: dict) -> EarthDailyItem:
|
|
289
328
|
"""Deserializes an item from JSON-decoded data."""
|
|
290
|
-
assert isinstance(serialized_item, dict)
|
|
291
329
|
return EarthDailyItem.deserialize(serialized_item)
|
|
292
330
|
|
|
293
331
|
def ingest(
|
|
@@ -341,144 +379,3 @@ class EarthDaily(DataSource, TileStore):
|
|
|
341
379
|
item.name,
|
|
342
380
|
asset_key,
|
|
343
381
|
)
|
|
344
|
-
|
|
345
|
-
def is_raster_ready(
|
|
346
|
-
self, layer_name: str, item_name: str, bands: list[str]
|
|
347
|
-
) -> bool:
|
|
348
|
-
"""Checks if this raster has been written to the store.
|
|
349
|
-
|
|
350
|
-
Args:
|
|
351
|
-
layer_name: the layer name or alias.
|
|
352
|
-
item_name: the item.
|
|
353
|
-
bands: the list of bands identifying which specific raster to read.
|
|
354
|
-
|
|
355
|
-
Returns:
|
|
356
|
-
whether there is a raster in the store matching the source, item, and
|
|
357
|
-
bands.
|
|
358
|
-
"""
|
|
359
|
-
# Always ready since we wrap accesses to EarthDaily.
|
|
360
|
-
return True
|
|
361
|
-
|
|
362
|
-
def get_raster_bands(self, layer_name: str, item_name: str) -> list[list[str]]:
|
|
363
|
-
"""Get the sets of bands that have been stored for the specified item.
|
|
364
|
-
|
|
365
|
-
Args:
|
|
366
|
-
layer_name: the layer name or alias.
|
|
367
|
-
item_name: the item.
|
|
368
|
-
"""
|
|
369
|
-
if self.skip_items_missing_assets:
|
|
370
|
-
# In this case we can assume that the item has all of the assets.
|
|
371
|
-
return list(self.asset_bands.values())
|
|
372
|
-
|
|
373
|
-
# Otherwise we have to lookup the STAC item to see which assets it has.
|
|
374
|
-
# Here we use get_item_by_name since it handles caching.
|
|
375
|
-
item = self.get_item_by_name(item_name)
|
|
376
|
-
all_bands = []
|
|
377
|
-
for asset_key, band_names in self.asset_bands.items():
|
|
378
|
-
if asset_key not in item.asset_urls:
|
|
379
|
-
continue
|
|
380
|
-
all_bands.append(band_names)
|
|
381
|
-
return all_bands
|
|
382
|
-
|
|
383
|
-
def _get_asset_by_band(self, bands: list[str]) -> str:
|
|
384
|
-
"""Get the name of the asset based on the band names."""
|
|
385
|
-
for asset_key, asset_bands in self.asset_bands.items():
|
|
386
|
-
if bands == asset_bands:
|
|
387
|
-
return asset_key
|
|
388
|
-
|
|
389
|
-
raise ValueError(f"no raster with bands {bands}")
|
|
390
|
-
|
|
391
|
-
def get_raster_bounds(
|
|
392
|
-
self, layer_name: str, item_name: str, bands: list[str], projection: Projection
|
|
393
|
-
) -> PixelBounds:
|
|
394
|
-
"""Get the bounds of the raster in the specified projection.
|
|
395
|
-
|
|
396
|
-
Args:
|
|
397
|
-
layer_name: the layer name or alias.
|
|
398
|
-
item_name: the item to check.
|
|
399
|
-
bands: the list of bands identifying which specific raster to read. These
|
|
400
|
-
bands must match the bands of a stored raster.
|
|
401
|
-
projection: the projection to get the raster's bounds in.
|
|
402
|
-
|
|
403
|
-
Returns:
|
|
404
|
-
the bounds of the raster in the projection.
|
|
405
|
-
"""
|
|
406
|
-
item = self.get_item_by_name(item_name)
|
|
407
|
-
geom = item.geometry.to_projection(projection)
|
|
408
|
-
return (
|
|
409
|
-
int(geom.shp.bounds[0]),
|
|
410
|
-
int(geom.shp.bounds[1]),
|
|
411
|
-
int(geom.shp.bounds[2]),
|
|
412
|
-
int(geom.shp.bounds[3]),
|
|
413
|
-
)
|
|
414
|
-
|
|
415
|
-
def read_raster(
|
|
416
|
-
self,
|
|
417
|
-
layer_name: str,
|
|
418
|
-
item_name: str,
|
|
419
|
-
bands: list[str],
|
|
420
|
-
projection: Projection,
|
|
421
|
-
bounds: PixelBounds,
|
|
422
|
-
resampling: Resampling = Resampling.bilinear,
|
|
423
|
-
) -> npt.NDArray[Any]:
|
|
424
|
-
"""Read raster data from the store.
|
|
425
|
-
|
|
426
|
-
Args:
|
|
427
|
-
layer_name: the layer name or alias.
|
|
428
|
-
item_name: the item to read.
|
|
429
|
-
bands: the list of bands identifying which specific raster to read. These
|
|
430
|
-
bands must match the bands of a stored raster.
|
|
431
|
-
projection: the projection to read in.
|
|
432
|
-
bounds: the bounds to read.
|
|
433
|
-
resampling: the resampling method to use in case reprojection is needed.
|
|
434
|
-
|
|
435
|
-
Returns:
|
|
436
|
-
the raster data
|
|
437
|
-
"""
|
|
438
|
-
asset_key = self._get_asset_by_band(bands)
|
|
439
|
-
item = self.get_item_by_name(item_name)
|
|
440
|
-
asset_url = item.asset_urls[asset_key]
|
|
441
|
-
|
|
442
|
-
# Construct the transform to use for the warped dataset.
|
|
443
|
-
wanted_transform = affine.Affine(
|
|
444
|
-
projection.x_resolution,
|
|
445
|
-
0,
|
|
446
|
-
bounds[0] * projection.x_resolution,
|
|
447
|
-
0,
|
|
448
|
-
projection.y_resolution,
|
|
449
|
-
bounds[1] * projection.y_resolution,
|
|
450
|
-
)
|
|
451
|
-
|
|
452
|
-
with rasterio.open(asset_url) as src:
|
|
453
|
-
with rasterio.vrt.WarpedVRT(
|
|
454
|
-
src,
|
|
455
|
-
crs=projection.crs,
|
|
456
|
-
transform=wanted_transform,
|
|
457
|
-
width=bounds[2] - bounds[0],
|
|
458
|
-
height=bounds[3] - bounds[1],
|
|
459
|
-
resampling=resampling,
|
|
460
|
-
) as vrt:
|
|
461
|
-
return vrt.read()
|
|
462
|
-
|
|
463
|
-
def materialize(
|
|
464
|
-
self,
|
|
465
|
-
window: Window,
|
|
466
|
-
item_groups: list[list[Item]],
|
|
467
|
-
layer_name: str,
|
|
468
|
-
layer_cfg: LayerConfig,
|
|
469
|
-
) -> None:
|
|
470
|
-
"""Materialize data for the window.
|
|
471
|
-
|
|
472
|
-
Args:
|
|
473
|
-
window: the window to materialize
|
|
474
|
-
item_groups: the items from get_items
|
|
475
|
-
layer_name: the name of this layer
|
|
476
|
-
layer_cfg: the config of this layer
|
|
477
|
-
"""
|
|
478
|
-
RasterMaterializer().materialize(
|
|
479
|
-
TileStoreWithLayer(self, layer_name),
|
|
480
|
-
window,
|
|
481
|
-
layer_name,
|
|
482
|
-
layer_cfg,
|
|
483
|
-
item_groups,
|
|
484
|
-
)
|