rslearn 0.0.1__py3-none-any.whl → 0.0.21__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (166) hide show
  1. rslearn/arg_parser.py +31 -0
  2. rslearn/config/__init__.py +6 -12
  3. rslearn/config/dataset.py +520 -401
  4. rslearn/const.py +9 -15
  5. rslearn/data_sources/__init__.py +8 -23
  6. rslearn/data_sources/aws_landsat.py +242 -98
  7. rslearn/data_sources/aws_open_data.py +111 -151
  8. rslearn/data_sources/aws_sentinel1.py +131 -0
  9. rslearn/data_sources/climate_data_store.py +471 -0
  10. rslearn/data_sources/copernicus.py +884 -12
  11. rslearn/data_sources/data_source.py +43 -12
  12. rslearn/data_sources/earthdaily.py +484 -0
  13. rslearn/data_sources/earthdata_srtm.py +282 -0
  14. rslearn/data_sources/eurocrops.py +242 -0
  15. rslearn/data_sources/gcp_public_data.py +578 -222
  16. rslearn/data_sources/google_earth_engine.py +461 -135
  17. rslearn/data_sources/local_files.py +219 -150
  18. rslearn/data_sources/openstreetmap.py +51 -89
  19. rslearn/data_sources/planet.py +24 -60
  20. rslearn/data_sources/planet_basemap.py +275 -0
  21. rslearn/data_sources/planetary_computer.py +798 -0
  22. rslearn/data_sources/usda_cdl.py +195 -0
  23. rslearn/data_sources/usgs_landsat.py +115 -83
  24. rslearn/data_sources/utils.py +249 -61
  25. rslearn/data_sources/vector_source.py +1 -0
  26. rslearn/data_sources/worldcereal.py +449 -0
  27. rslearn/data_sources/worldcover.py +144 -0
  28. rslearn/data_sources/worldpop.py +153 -0
  29. rslearn/data_sources/xyz_tiles.py +150 -107
  30. rslearn/dataset/__init__.py +8 -2
  31. rslearn/dataset/add_windows.py +2 -2
  32. rslearn/dataset/dataset.py +40 -51
  33. rslearn/dataset/handler_summaries.py +131 -0
  34. rslearn/dataset/manage.py +313 -74
  35. rslearn/dataset/materialize.py +431 -107
  36. rslearn/dataset/remap.py +29 -4
  37. rslearn/dataset/storage/__init__.py +1 -0
  38. rslearn/dataset/storage/file.py +202 -0
  39. rslearn/dataset/storage/storage.py +140 -0
  40. rslearn/dataset/window.py +181 -44
  41. rslearn/lightning_cli.py +454 -0
  42. rslearn/log_utils.py +24 -0
  43. rslearn/main.py +384 -181
  44. rslearn/models/anysat.py +215 -0
  45. rslearn/models/attention_pooling.py +177 -0
  46. rslearn/models/clay/clay.py +231 -0
  47. rslearn/models/clay/configs/metadata.yaml +295 -0
  48. rslearn/models/clip.py +68 -0
  49. rslearn/models/component.py +111 -0
  50. rslearn/models/concatenate_features.py +103 -0
  51. rslearn/models/conv.py +63 -0
  52. rslearn/models/croma.py +306 -0
  53. rslearn/models/detr/__init__.py +5 -0
  54. rslearn/models/detr/box_ops.py +103 -0
  55. rslearn/models/detr/detr.py +504 -0
  56. rslearn/models/detr/matcher.py +107 -0
  57. rslearn/models/detr/position_encoding.py +114 -0
  58. rslearn/models/detr/transformer.py +429 -0
  59. rslearn/models/detr/util.py +24 -0
  60. rslearn/models/dinov3.py +177 -0
  61. rslearn/models/faster_rcnn.py +30 -28
  62. rslearn/models/feature_center_crop.py +53 -0
  63. rslearn/models/fpn.py +19 -8
  64. rslearn/models/galileo/__init__.py +5 -0
  65. rslearn/models/galileo/galileo.py +595 -0
  66. rslearn/models/galileo/single_file_galileo.py +1678 -0
  67. rslearn/models/module_wrapper.py +65 -0
  68. rslearn/models/molmo.py +69 -0
  69. rslearn/models/multitask.py +384 -28
  70. rslearn/models/olmoearth_pretrain/__init__.py +1 -0
  71. rslearn/models/olmoearth_pretrain/model.py +421 -0
  72. rslearn/models/olmoearth_pretrain/norm.py +86 -0
  73. rslearn/models/panopticon.py +170 -0
  74. rslearn/models/panopticon_data/sensors/drone.yaml +32 -0
  75. rslearn/models/panopticon_data/sensors/enmap.yaml +904 -0
  76. rslearn/models/panopticon_data/sensors/goes.yaml +9 -0
  77. rslearn/models/panopticon_data/sensors/himawari.yaml +9 -0
  78. rslearn/models/panopticon_data/sensors/intuition.yaml +606 -0
  79. rslearn/models/panopticon_data/sensors/landsat8.yaml +84 -0
  80. rslearn/models/panopticon_data/sensors/modis_terra.yaml +99 -0
  81. rslearn/models/panopticon_data/sensors/qb2_ge1.yaml +34 -0
  82. rslearn/models/panopticon_data/sensors/sentinel1.yaml +85 -0
  83. rslearn/models/panopticon_data/sensors/sentinel2.yaml +97 -0
  84. rslearn/models/panopticon_data/sensors/superdove.yaml +60 -0
  85. rslearn/models/panopticon_data/sensors/wv23.yaml +63 -0
  86. rslearn/models/pick_features.py +17 -10
  87. rslearn/models/pooling_decoder.py +60 -7
  88. rslearn/models/presto/__init__.py +5 -0
  89. rslearn/models/presto/presto.py +297 -0
  90. rslearn/models/presto/single_file_presto.py +926 -0
  91. rslearn/models/prithvi.py +1147 -0
  92. rslearn/models/resize_features.py +59 -0
  93. rslearn/models/sam2_enc.py +13 -9
  94. rslearn/models/satlaspretrain.py +38 -18
  95. rslearn/models/simple_time_series.py +188 -77
  96. rslearn/models/singletask.py +24 -13
  97. rslearn/models/ssl4eo_s12.py +40 -30
  98. rslearn/models/swin.py +44 -32
  99. rslearn/models/task_embedding.py +250 -0
  100. rslearn/models/terramind.py +256 -0
  101. rslearn/models/trunk.py +139 -0
  102. rslearn/models/unet.py +68 -22
  103. rslearn/models/upsample.py +48 -0
  104. rslearn/models/use_croma.py +508 -0
  105. rslearn/template_params.py +26 -0
  106. rslearn/tile_stores/__init__.py +41 -18
  107. rslearn/tile_stores/default.py +409 -0
  108. rslearn/tile_stores/tile_store.py +236 -132
  109. rslearn/train/all_patches_dataset.py +530 -0
  110. rslearn/train/callbacks/adapters.py +53 -0
  111. rslearn/train/callbacks/freeze_unfreeze.py +348 -17
  112. rslearn/train/callbacks/gradients.py +129 -0
  113. rslearn/train/callbacks/peft.py +116 -0
  114. rslearn/train/data_module.py +444 -20
  115. rslearn/train/dataset.py +588 -235
  116. rslearn/train/lightning_module.py +192 -62
  117. rslearn/train/model_context.py +88 -0
  118. rslearn/train/optimizer.py +31 -0
  119. rslearn/train/prediction_writer.py +319 -84
  120. rslearn/train/scheduler.py +92 -0
  121. rslearn/train/tasks/classification.py +55 -28
  122. rslearn/train/tasks/detection.py +132 -76
  123. rslearn/train/tasks/embedding.py +120 -0
  124. rslearn/train/tasks/multi_task.py +28 -14
  125. rslearn/train/tasks/per_pixel_regression.py +291 -0
  126. rslearn/train/tasks/regression.py +161 -44
  127. rslearn/train/tasks/segmentation.py +428 -53
  128. rslearn/train/tasks/task.py +6 -5
  129. rslearn/train/transforms/__init__.py +1 -1
  130. rslearn/train/transforms/concatenate.py +54 -10
  131. rslearn/train/transforms/crop.py +29 -11
  132. rslearn/train/transforms/flip.py +18 -6
  133. rslearn/train/transforms/mask.py +78 -0
  134. rslearn/train/transforms/normalize.py +101 -17
  135. rslearn/train/transforms/pad.py +19 -7
  136. rslearn/train/transforms/resize.py +83 -0
  137. rslearn/train/transforms/select_bands.py +76 -0
  138. rslearn/train/transforms/sentinel1.py +75 -0
  139. rslearn/train/transforms/transform.py +89 -70
  140. rslearn/utils/__init__.py +2 -6
  141. rslearn/utils/array.py +8 -6
  142. rslearn/utils/feature.py +2 -2
  143. rslearn/utils/fsspec.py +90 -1
  144. rslearn/utils/geometry.py +347 -7
  145. rslearn/utils/get_utm_ups_crs.py +2 -3
  146. rslearn/utils/grid_index.py +5 -5
  147. rslearn/utils/jsonargparse.py +178 -0
  148. rslearn/utils/mp.py +4 -3
  149. rslearn/utils/raster_format.py +268 -116
  150. rslearn/utils/rtree_index.py +64 -17
  151. rslearn/utils/sqlite_index.py +7 -1
  152. rslearn/utils/vector_format.py +252 -97
  153. {rslearn-0.0.1.dist-info → rslearn-0.0.21.dist-info}/METADATA +532 -283
  154. rslearn-0.0.21.dist-info/RECORD +167 -0
  155. {rslearn-0.0.1.dist-info → rslearn-0.0.21.dist-info}/WHEEL +1 -1
  156. rslearn-0.0.21.dist-info/licenses/NOTICE +115 -0
  157. rslearn/data_sources/raster_source.py +0 -309
  158. rslearn/models/registry.py +0 -5
  159. rslearn/tile_stores/file.py +0 -242
  160. rslearn/utils/mgrs.py +0 -24
  161. rslearn/utils/utils.py +0 -22
  162. rslearn-0.0.1.dist-info/RECORD +0 -88
  163. /rslearn/{data_sources/geotiff.py → py.typed} +0 -0
  164. {rslearn-0.0.1.dist-info → rslearn-0.0.21.dist-info}/entry_points.txt +0 -0
  165. {rslearn-0.0.1.dist-info → rslearn-0.0.21.dist-info/licenses}/LICENSE +0 -0
  166. {rslearn-0.0.1.dist-info → rslearn-0.0.21.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,471 @@
1
+ """Data source for Copernicus Climate Data Store."""
2
+
3
+ import os
4
+ import tempfile
5
+ from datetime import UTC, datetime
6
+ from typing import Any
7
+
8
+ import cdsapi
9
+ import netCDF4
10
+ import numpy as np
11
+ import rasterio
12
+ import shapely
13
+ from dateutil.relativedelta import relativedelta
14
+ from rasterio.transform import from_origin
15
+ from upath import UPath
16
+
17
+ from rslearn.config import QueryConfig, SpaceMode
18
+ from rslearn.const import WGS84_EPSG, WGS84_PROJECTION
19
+ from rslearn.data_sources import DataSource, DataSourceContext, Item
20
+ from rslearn.log_utils import get_logger
21
+ from rslearn.tile_stores import TileStoreWithLayer
22
+ from rslearn.utils.geometry import STGeometry
23
+
24
+ logger = get_logger(__name__)
25
+
26
+
27
+ class ERA5Land(DataSource):
28
+ """Base class for ingesting ERA5 land data from the Copernicus Climate Data Store.
29
+
30
+ An API key must be passed either in the configuration or via the CDSAPI_KEY
31
+ environment variable. You can acquire an API key by going to the Climate Data Store
32
+ website (https://cds.climate.copernicus.eu/), registering an account and logging
33
+ in, and then getting the API key from the user profile page.
34
+
35
+ The band names should match CDS variable names (see the reference at
36
+ https://confluence.ecmwf.int/display/CKB/ERA5-Land%3A+data+documentation). However,
37
+ replace "_" with "-" in the variable names when specifying bands in the layer
38
+ configuration.
39
+
40
+ By default, all requests to the API will be for the whole globe. To speed up ingestion,
41
+ we recommend specifying the bounds of the area of interest, in particular for hourly data.
42
+ """
43
+
44
+ api_url = "https://cds.climate.copernicus.eu/api"
45
+ DATA_FORMAT = "netcdf"
46
+ DOWNLOAD_FORMAT = "unarchived"
47
+ PIXEL_SIZE = 0.1 # degrees, native resolution is 9km
48
+
49
+ def __init__(
50
+ self,
51
+ dataset: str,
52
+ product_type: str,
53
+ band_names: list[str] | None = None,
54
+ api_key: str | None = None,
55
+ bounds: list[float] | None = None,
56
+ context: DataSourceContext = DataSourceContext(),
57
+ ):
58
+ """Initialize a new ERA5Land instance.
59
+
60
+ Args:
61
+ dataset: the CDS dataset name (e.g., "reanalysis-era5-land-monthly-means").
62
+ product_type: the CDS product type (e.g., "monthly_averaged_reanalysis").
63
+ band_names: list of band names to acquire. These should correspond to CDS
64
+ variable names but with "_" replaced with "-". This will only be used
65
+ if the layer config is missing from the context.
66
+ api_key: the API key. If not set, it should be set via the CDSAPI_KEY
67
+ environment variable.
68
+ bounds: optional bounding box as [min_lon, min_lat, max_lon, max_lat].
69
+ If not specified, the whole globe will be used.
70
+ context: the data source context.
71
+ """
72
+ self.dataset = dataset
73
+ self.product_type = product_type
74
+ self.bounds = bounds
75
+
76
+ self.band_names: list[str]
77
+ if context.layer_config is not None:
78
+ self.band_names = []
79
+ for band_set in context.layer_config.band_sets:
80
+ for band in band_set.bands:
81
+ if band in self.band_names:
82
+ continue
83
+ self.band_names.append(band)
84
+ elif band_names is not None:
85
+ self.band_names = band_names
86
+ else:
87
+ raise ValueError(
88
+ "band_names must be set if layer_config is not in the context"
89
+ )
90
+
91
+ self.client = cdsapi.Client(
92
+ url=self.api_url,
93
+ key=api_key,
94
+ )
95
+
96
+ def get_items(
97
+ self, geometries: list[STGeometry], query_config: QueryConfig
98
+ ) -> list[list[list[Item]]]:
99
+ """Get a list if items in the data source intersecting the given geometries.
100
+
101
+ Args:
102
+ geometries: the spatiotemporal geometries
103
+ query_config: the query configuration
104
+
105
+ Returns:
106
+ List of groups of items that should be retrieved for each geometry.
107
+ """
108
+ # We only support mosaic here, other query modes don't really make sense.
109
+ if query_config.space_mode != SpaceMode.MOSAIC:
110
+ raise ValueError("expected mosaic space mode in the query configuration")
111
+
112
+ all_groups = []
113
+ for geometry in geometries:
114
+ if geometry.time_range is None:
115
+ raise ValueError("expected all geometries to have a time range")
116
+
117
+ # Compute one mosaic for each month in this geometry.
118
+ cur_date = datetime(
119
+ geometry.time_range[0].year,
120
+ geometry.time_range[0].month,
121
+ 1,
122
+ tzinfo=UTC,
123
+ )
124
+ end_date = datetime(
125
+ geometry.time_range[1].year,
126
+ geometry.time_range[1].month,
127
+ 1,
128
+ tzinfo=UTC,
129
+ )
130
+
131
+ month_dates: list[datetime] = []
132
+ while cur_date <= end_date:
133
+ month_dates.append(cur_date)
134
+ cur_date += relativedelta(months=1)
135
+
136
+ cur_groups = []
137
+ for cur_date in month_dates:
138
+ # Collect Item list corresponding to the current month.
139
+ items = []
140
+ item_name = f"era5land_monthlyaveraged_{cur_date.year}_{cur_date.month}"
141
+ # Use bounds if set, otherwise use whole globe
142
+ if self.bounds is not None:
143
+ bounds = self.bounds
144
+ else:
145
+ bounds = [-180, -90, 180, 90]
146
+ # Time is just the given month.
147
+ start_date = datetime(cur_date.year, cur_date.month, 1, tzinfo=UTC)
148
+ time_range = (
149
+ start_date,
150
+ start_date + relativedelta(months=1),
151
+ )
152
+ geometry = STGeometry(
153
+ WGS84_PROJECTION,
154
+ shapely.box(*bounds),
155
+ time_range,
156
+ )
157
+ items.append(Item(item_name, geometry))
158
+ cur_groups.append(items)
159
+ all_groups.append(cur_groups)
160
+
161
+ return all_groups
162
+
163
+ def deserialize_item(self, serialized_item: Any) -> Item:
164
+ """Deserializes an item from JSON-decoded data."""
165
+ assert isinstance(serialized_item, dict)
166
+ return Item.deserialize(serialized_item)
167
+
168
+ def _convert_nc_to_tif(self, nc_path: UPath, tif_path: UPath) -> None:
169
+ """Convert a netCDF file to a GeoTIFF file.
170
+
171
+ Args:
172
+ nc_path: the path to the netCDF file
173
+ tif_path: the path to the output GeoTIFF file
174
+ """
175
+ nc = netCDF4.Dataset(nc_path)
176
+ # The file contains a list of variables.
177
+ # These variables include things that are not bands that we want, such as
178
+ # latitude, longitude, valid_time, expver, etc.
179
+ # But the list of variables should include the bands we want in the correct
180
+ # order. And we can distinguish those bands from other "variables" because they
181
+ # will be 3D while the others will be scalars or 1D.
182
+
183
+ band_arrays = []
184
+ num_time_steps = None
185
+ for band_name in nc.variables:
186
+ band_data = nc.variables[band_name]
187
+ if len(band_data.shape) != 3:
188
+ # This should be one of those variables that we want to skip.
189
+ continue
190
+
191
+ logger.debug(
192
+ f"NC file {nc_path} has variable {band_name} with shape {band_data.shape}"
193
+ )
194
+ # Variable data is stored in a 3D array (time, height, width)
195
+ # For hourly data, time is number of days in the month x 24 hours
196
+ if num_time_steps is None:
197
+ num_time_steps = band_data.shape[0]
198
+ elif band_data.shape[0] != num_time_steps:
199
+ raise ValueError(
200
+ f"Variable {band_name} has {band_data.shape[0]} time steps, "
201
+ f"but expected {num_time_steps}"
202
+ )
203
+ # Original shape: (time, height, width)
204
+ band_array = np.array(band_data[:])
205
+ band_array = np.expand_dims(band_array, axis=1)
206
+ band_arrays.append(band_array)
207
+
208
+ # After concatenation: (time, num_variables, height, width)
209
+ stacked_array = np.concatenate(band_arrays, axis=1)
210
+
211
+ # After reshaping: (time x num_variables, height, width)
212
+ array = stacked_array.reshape(
213
+ -1, stacked_array.shape[2], stacked_array.shape[3]
214
+ )
215
+
216
+ # Get metadata for the GeoTIFF
217
+ lat = nc.variables["latitude"][:]
218
+ lon = nc.variables["longitude"][:]
219
+ # Convert longitude from 0–360 to -180–180 and sort
220
+ lon = (lon + 180) % 360 - 180
221
+ sorted_indices = lon.argsort()
222
+ lon = lon[sorted_indices]
223
+
224
+ # Reorder the data array to match the new longitude order
225
+ array = array[:, :, sorted_indices]
226
+
227
+ # Check the spacing of the grid, make sure it's uniform
228
+ for i in range(len(lon) - 1):
229
+ if round(lon[i + 1] - lon[i], 1) != self.PIXEL_SIZE:
230
+ raise ValueError(
231
+ f"Longitude spacing is not uniform: {lon[i + 1] - lon[i]}"
232
+ )
233
+ for i in range(len(lat) - 1):
234
+ if round(lat[i + 1] - lat[i], 1) != -self.PIXEL_SIZE:
235
+ raise ValueError(
236
+ f"Latitude spacing is not uniform: {lat[i + 1] - lat[i]}"
237
+ )
238
+ west = lon.min() - self.PIXEL_SIZE / 2
239
+ north = lat.max() + self.PIXEL_SIZE / 2
240
+ pixel_size_x, pixel_size_y = self.PIXEL_SIZE, self.PIXEL_SIZE
241
+ transform = from_origin(west, north, pixel_size_x, pixel_size_y)
242
+ crs = f"EPSG:{WGS84_EPSG}"
243
+ with rasterio.open(
244
+ tif_path,
245
+ "w",
246
+ driver="GTiff",
247
+ height=array.shape[1],
248
+ width=array.shape[2],
249
+ count=array.shape[0],
250
+ dtype=array.dtype,
251
+ crs=crs,
252
+ transform=transform,
253
+ ) as dst:
254
+ dst.write(array)
255
+
256
+ def ingest(
257
+ self,
258
+ tile_store: TileStoreWithLayer,
259
+ items: list[Item],
260
+ geometries: list[list[STGeometry]],
261
+ ) -> None:
262
+ """Ingest items into the given tile store.
263
+
264
+ This method should be overridden by subclasses.
265
+
266
+ Args:
267
+ tile_store: the tile store to ingest into
268
+ items: the items to ingest
269
+ geometries: a list of geometries needed for each item
270
+ """
271
+ raise NotImplementedError("Subclasses must implement ingest method")
272
+
273
+
274
+ class ERA5LandMonthlyMeans(ERA5Land):
275
+ """A data source for ingesting ERA5 land monthly averaged data from the Copernicus Climate Data Store.
276
+
277
+ This data source corresponds to the reanalysis-era5-land-monthly-means product.
278
+ """
279
+
280
+ def __init__(
281
+ self,
282
+ band_names: list[str] | None = None,
283
+ api_key: str | None = None,
284
+ bounds: list[float] | None = None,
285
+ context: DataSourceContext = DataSourceContext(),
286
+ ):
287
+ """Initialize a new ERA5LandMonthlyMeans instance.
288
+
289
+ Args:
290
+ band_names: list of band names to acquire. These should correspond to CDS
291
+ variable names but with "_" replaced with "-". This will only be used
292
+ if the layer config is missing from the context.
293
+ api_key: the API key. If not set, it should be set via the CDSAPI_KEY
294
+ environment variable.
295
+ bounds: optional bounding box as [min_lon, min_lat, max_lon, max_lat].
296
+ If not specified, the whole globe will be used.
297
+ context: the data source context.
298
+ """
299
+ super().__init__(
300
+ dataset="reanalysis-era5-land-monthly-means",
301
+ product_type="monthly_averaged_reanalysis",
302
+ band_names=band_names,
303
+ api_key=api_key,
304
+ bounds=bounds,
305
+ context=context,
306
+ )
307
+
308
+ def ingest(
309
+ self,
310
+ tile_store: TileStoreWithLayer,
311
+ items: list[Item],
312
+ geometries: list[list[STGeometry]],
313
+ ) -> None:
314
+ """Ingest items into the given tile store.
315
+
316
+ Args:
317
+ tile_store: the tile store to ingest into
318
+ items: the items to ingest
319
+ geometries: a list of geometries needed for each item
320
+ """
321
+ # for CDS variable names, replace "-" with "_"
322
+ variable_names = [band.replace("-", "_") for band in self.band_names]
323
+
324
+ for item in items:
325
+ if tile_store.is_raster_ready(item.name, self.band_names):
326
+ continue
327
+
328
+ # Send the request to the CDS API
329
+ if self.bounds is not None:
330
+ min_lon, min_lat, max_lon, max_lat = self.bounds
331
+ area = [max_lat, min_lon, min_lat, max_lon]
332
+ else:
333
+ area = [90, -180, -90, 180] # Whole globe
334
+
335
+ request = {
336
+ "product_type": [self.product_type],
337
+ "variable": variable_names,
338
+ "year": [f"{item.geometry.time_range[0].year}"], # type: ignore
339
+ "month": [
340
+ f"{item.geometry.time_range[0].month:02d}" # type: ignore
341
+ ],
342
+ "time": ["00:00"],
343
+ "area": area,
344
+ "data_format": self.DATA_FORMAT,
345
+ "download_format": self.DOWNLOAD_FORMAT,
346
+ }
347
+ logger.debug(
348
+ f"CDS API request for year={request['year']} month={request['month']} area={area}"
349
+ )
350
+ with tempfile.TemporaryDirectory() as tmp_dir:
351
+ local_nc_fname = os.path.join(tmp_dir, f"{item.name}.nc")
352
+ local_tif_fname = os.path.join(tmp_dir, f"{item.name}.tif")
353
+ self.client.retrieve(self.dataset, request, local_nc_fname)
354
+ self._convert_nc_to_tif(
355
+ UPath(local_nc_fname),
356
+ UPath(local_tif_fname),
357
+ )
358
+ tile_store.write_raster_file(
359
+ item.name, self.band_names, UPath(local_tif_fname)
360
+ )
361
+
362
+
363
+ class ERA5LandHourly(ERA5Land):
364
+ """A data source for ingesting ERA5 land hourly data from the Copernicus Climate Data Store.
365
+
366
+ This data source corresponds to the reanalysis-era5-land product.
367
+ """
368
+
369
+ def __init__(
370
+ self,
371
+ band_names: list[str] | None = None,
372
+ api_key: str | None = None,
373
+ bounds: list[float] | None = None,
374
+ context: DataSourceContext = DataSourceContext(),
375
+ ):
376
+ """Initialize a new ERA5LandHourly instance.
377
+
378
+ Args:
379
+ band_names: list of band names to acquire. These should correspond to CDS
380
+ variable names but with "_" replaced with "-". This will only be used
381
+ if the layer config is missing from the context.
382
+ api_key: the API key. If not set, it should be set via the CDSAPI_KEY
383
+ environment variable.
384
+ bounds: optional bounding box as [min_lon, min_lat, max_lon, max_lat].
385
+ If not specified, the whole globe will be used.
386
+ context: the data source context.
387
+ """
388
+ super().__init__(
389
+ dataset="reanalysis-era5-land",
390
+ product_type="reanalysis",
391
+ band_names=band_names,
392
+ api_key=api_key,
393
+ bounds=bounds,
394
+ context=context,
395
+ )
396
+
397
+ def ingest(
398
+ self,
399
+ tile_store: TileStoreWithLayer,
400
+ items: list[Item],
401
+ geometries: list[list[STGeometry]],
402
+ ) -> None:
403
+ """Ingest items into the given tile store.
404
+
405
+ Args:
406
+ tile_store: the tile store to ingest into
407
+ items: the items to ingest
408
+ geometries: a list of geometries needed for each item
409
+ """
410
+ # for CDS variable names, replace "-" with "_"
411
+ variable_names = [band.replace("-", "_") for band in self.band_names]
412
+
413
+ for item in items:
414
+ if tile_store.is_raster_ready(item.name, self.band_names):
415
+ continue
416
+
417
+ # Send the request to the CDS API
418
+ # If area is not specified, the whole globe will be requested
419
+ time_range = item.geometry.time_range
420
+ if time_range is None:
421
+ raise ValueError("Item must have a time range")
422
+
423
+ # For hourly data, request all days in the month and all 24 hours
424
+ start_time = time_range[0]
425
+
426
+ # Get all days in the month
427
+ year = start_time.year
428
+ month = start_time.month
429
+ # Get the last day of the month
430
+ if month == 12:
431
+ last_day = 31
432
+ else:
433
+ next_month = datetime(year, month + 1, 1, tzinfo=UTC)
434
+ last_day = (next_month - relativedelta(days=1)).day
435
+
436
+ days = [f"{day:02d}" for day in range(1, last_day + 1)]
437
+
438
+ # Get all 24 hours
439
+ hours = [f"{hour:02d}:00" for hour in range(24)]
440
+
441
+ if self.bounds is not None:
442
+ min_lon, min_lat, max_lon, max_lat = self.bounds
443
+ area = [max_lat, min_lon, min_lat, max_lon]
444
+ else:
445
+ area = [90, -180, -90, 180] # Whole globe
446
+
447
+ request = {
448
+ "product_type": [self.product_type],
449
+ "variable": variable_names,
450
+ "year": [f"{year}"],
451
+ "month": [f"{month:02d}"],
452
+ "day": days,
453
+ "time": hours,
454
+ "area": area,
455
+ "data_format": self.DATA_FORMAT,
456
+ "download_format": self.DOWNLOAD_FORMAT,
457
+ }
458
+ logger.debug(
459
+ f"CDS API request for year={request['year']} month={request['month']} days={len(days)} hours={len(hours)} area={area}"
460
+ )
461
+ with tempfile.TemporaryDirectory() as tmp_dir:
462
+ local_nc_fname = os.path.join(tmp_dir, f"{item.name}.nc")
463
+ local_tif_fname = os.path.join(tmp_dir, f"{item.name}.tif")
464
+ self.client.retrieve(self.dataset, request, local_nc_fname)
465
+ self._convert_nc_to_tif(
466
+ UPath(local_nc_fname),
467
+ UPath(local_tif_fname),
468
+ )
469
+ tile_store.write_raster_file(
470
+ item.name, self.band_names, UPath(local_tif_fname)
471
+ )