rslearn 0.0.1__py3-none-any.whl → 0.0.21__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (166) hide show
  1. rslearn/arg_parser.py +31 -0
  2. rslearn/config/__init__.py +6 -12
  3. rslearn/config/dataset.py +520 -401
  4. rslearn/const.py +9 -15
  5. rslearn/data_sources/__init__.py +8 -23
  6. rslearn/data_sources/aws_landsat.py +242 -98
  7. rslearn/data_sources/aws_open_data.py +111 -151
  8. rslearn/data_sources/aws_sentinel1.py +131 -0
  9. rslearn/data_sources/climate_data_store.py +471 -0
  10. rslearn/data_sources/copernicus.py +884 -12
  11. rslearn/data_sources/data_source.py +43 -12
  12. rslearn/data_sources/earthdaily.py +484 -0
  13. rslearn/data_sources/earthdata_srtm.py +282 -0
  14. rslearn/data_sources/eurocrops.py +242 -0
  15. rslearn/data_sources/gcp_public_data.py +578 -222
  16. rslearn/data_sources/google_earth_engine.py +461 -135
  17. rslearn/data_sources/local_files.py +219 -150
  18. rslearn/data_sources/openstreetmap.py +51 -89
  19. rslearn/data_sources/planet.py +24 -60
  20. rslearn/data_sources/planet_basemap.py +275 -0
  21. rslearn/data_sources/planetary_computer.py +798 -0
  22. rslearn/data_sources/usda_cdl.py +195 -0
  23. rslearn/data_sources/usgs_landsat.py +115 -83
  24. rslearn/data_sources/utils.py +249 -61
  25. rslearn/data_sources/vector_source.py +1 -0
  26. rslearn/data_sources/worldcereal.py +449 -0
  27. rslearn/data_sources/worldcover.py +144 -0
  28. rslearn/data_sources/worldpop.py +153 -0
  29. rslearn/data_sources/xyz_tiles.py +150 -107
  30. rslearn/dataset/__init__.py +8 -2
  31. rslearn/dataset/add_windows.py +2 -2
  32. rslearn/dataset/dataset.py +40 -51
  33. rslearn/dataset/handler_summaries.py +131 -0
  34. rslearn/dataset/manage.py +313 -74
  35. rslearn/dataset/materialize.py +431 -107
  36. rslearn/dataset/remap.py +29 -4
  37. rslearn/dataset/storage/__init__.py +1 -0
  38. rslearn/dataset/storage/file.py +202 -0
  39. rslearn/dataset/storage/storage.py +140 -0
  40. rslearn/dataset/window.py +181 -44
  41. rslearn/lightning_cli.py +454 -0
  42. rslearn/log_utils.py +24 -0
  43. rslearn/main.py +384 -181
  44. rslearn/models/anysat.py +215 -0
  45. rslearn/models/attention_pooling.py +177 -0
  46. rslearn/models/clay/clay.py +231 -0
  47. rslearn/models/clay/configs/metadata.yaml +295 -0
  48. rslearn/models/clip.py +68 -0
  49. rslearn/models/component.py +111 -0
  50. rslearn/models/concatenate_features.py +103 -0
  51. rslearn/models/conv.py +63 -0
  52. rslearn/models/croma.py +306 -0
  53. rslearn/models/detr/__init__.py +5 -0
  54. rslearn/models/detr/box_ops.py +103 -0
  55. rslearn/models/detr/detr.py +504 -0
  56. rslearn/models/detr/matcher.py +107 -0
  57. rslearn/models/detr/position_encoding.py +114 -0
  58. rslearn/models/detr/transformer.py +429 -0
  59. rslearn/models/detr/util.py +24 -0
  60. rslearn/models/dinov3.py +177 -0
  61. rslearn/models/faster_rcnn.py +30 -28
  62. rslearn/models/feature_center_crop.py +53 -0
  63. rslearn/models/fpn.py +19 -8
  64. rslearn/models/galileo/__init__.py +5 -0
  65. rslearn/models/galileo/galileo.py +595 -0
  66. rslearn/models/galileo/single_file_galileo.py +1678 -0
  67. rslearn/models/module_wrapper.py +65 -0
  68. rslearn/models/molmo.py +69 -0
  69. rslearn/models/multitask.py +384 -28
  70. rslearn/models/olmoearth_pretrain/__init__.py +1 -0
  71. rslearn/models/olmoearth_pretrain/model.py +421 -0
  72. rslearn/models/olmoearth_pretrain/norm.py +86 -0
  73. rslearn/models/panopticon.py +170 -0
  74. rslearn/models/panopticon_data/sensors/drone.yaml +32 -0
  75. rslearn/models/panopticon_data/sensors/enmap.yaml +904 -0
  76. rslearn/models/panopticon_data/sensors/goes.yaml +9 -0
  77. rslearn/models/panopticon_data/sensors/himawari.yaml +9 -0
  78. rslearn/models/panopticon_data/sensors/intuition.yaml +606 -0
  79. rslearn/models/panopticon_data/sensors/landsat8.yaml +84 -0
  80. rslearn/models/panopticon_data/sensors/modis_terra.yaml +99 -0
  81. rslearn/models/panopticon_data/sensors/qb2_ge1.yaml +34 -0
  82. rslearn/models/panopticon_data/sensors/sentinel1.yaml +85 -0
  83. rslearn/models/panopticon_data/sensors/sentinel2.yaml +97 -0
  84. rslearn/models/panopticon_data/sensors/superdove.yaml +60 -0
  85. rslearn/models/panopticon_data/sensors/wv23.yaml +63 -0
  86. rslearn/models/pick_features.py +17 -10
  87. rslearn/models/pooling_decoder.py +60 -7
  88. rslearn/models/presto/__init__.py +5 -0
  89. rslearn/models/presto/presto.py +297 -0
  90. rslearn/models/presto/single_file_presto.py +926 -0
  91. rslearn/models/prithvi.py +1147 -0
  92. rslearn/models/resize_features.py +59 -0
  93. rslearn/models/sam2_enc.py +13 -9
  94. rslearn/models/satlaspretrain.py +38 -18
  95. rslearn/models/simple_time_series.py +188 -77
  96. rslearn/models/singletask.py +24 -13
  97. rslearn/models/ssl4eo_s12.py +40 -30
  98. rslearn/models/swin.py +44 -32
  99. rslearn/models/task_embedding.py +250 -0
  100. rslearn/models/terramind.py +256 -0
  101. rslearn/models/trunk.py +139 -0
  102. rslearn/models/unet.py +68 -22
  103. rslearn/models/upsample.py +48 -0
  104. rslearn/models/use_croma.py +508 -0
  105. rslearn/template_params.py +26 -0
  106. rslearn/tile_stores/__init__.py +41 -18
  107. rslearn/tile_stores/default.py +409 -0
  108. rslearn/tile_stores/tile_store.py +236 -132
  109. rslearn/train/all_patches_dataset.py +530 -0
  110. rslearn/train/callbacks/adapters.py +53 -0
  111. rslearn/train/callbacks/freeze_unfreeze.py +348 -17
  112. rslearn/train/callbacks/gradients.py +129 -0
  113. rslearn/train/callbacks/peft.py +116 -0
  114. rslearn/train/data_module.py +444 -20
  115. rslearn/train/dataset.py +588 -235
  116. rslearn/train/lightning_module.py +192 -62
  117. rslearn/train/model_context.py +88 -0
  118. rslearn/train/optimizer.py +31 -0
  119. rslearn/train/prediction_writer.py +319 -84
  120. rslearn/train/scheduler.py +92 -0
  121. rslearn/train/tasks/classification.py +55 -28
  122. rslearn/train/tasks/detection.py +132 -76
  123. rslearn/train/tasks/embedding.py +120 -0
  124. rslearn/train/tasks/multi_task.py +28 -14
  125. rslearn/train/tasks/per_pixel_regression.py +291 -0
  126. rslearn/train/tasks/regression.py +161 -44
  127. rslearn/train/tasks/segmentation.py +428 -53
  128. rslearn/train/tasks/task.py +6 -5
  129. rslearn/train/transforms/__init__.py +1 -1
  130. rslearn/train/transforms/concatenate.py +54 -10
  131. rslearn/train/transforms/crop.py +29 -11
  132. rslearn/train/transforms/flip.py +18 -6
  133. rslearn/train/transforms/mask.py +78 -0
  134. rslearn/train/transforms/normalize.py +101 -17
  135. rslearn/train/transforms/pad.py +19 -7
  136. rslearn/train/transforms/resize.py +83 -0
  137. rslearn/train/transforms/select_bands.py +76 -0
  138. rslearn/train/transforms/sentinel1.py +75 -0
  139. rslearn/train/transforms/transform.py +89 -70
  140. rslearn/utils/__init__.py +2 -6
  141. rslearn/utils/array.py +8 -6
  142. rslearn/utils/feature.py +2 -2
  143. rslearn/utils/fsspec.py +90 -1
  144. rslearn/utils/geometry.py +347 -7
  145. rslearn/utils/get_utm_ups_crs.py +2 -3
  146. rslearn/utils/grid_index.py +5 -5
  147. rslearn/utils/jsonargparse.py +178 -0
  148. rslearn/utils/mp.py +4 -3
  149. rslearn/utils/raster_format.py +268 -116
  150. rslearn/utils/rtree_index.py +64 -17
  151. rslearn/utils/sqlite_index.py +7 -1
  152. rslearn/utils/vector_format.py +252 -97
  153. {rslearn-0.0.1.dist-info → rslearn-0.0.21.dist-info}/METADATA +532 -283
  154. rslearn-0.0.21.dist-info/RECORD +167 -0
  155. {rslearn-0.0.1.dist-info → rslearn-0.0.21.dist-info}/WHEEL +1 -1
  156. rslearn-0.0.21.dist-info/licenses/NOTICE +115 -0
  157. rslearn/data_sources/raster_source.py +0 -309
  158. rslearn/models/registry.py +0 -5
  159. rslearn/tile_stores/file.py +0 -242
  160. rslearn/utils/mgrs.py +0 -24
  161. rslearn/utils/utils.py +0 -22
  162. rslearn-0.0.1.dist-info/RECORD +0 -88
  163. /rslearn/{data_sources/geotiff.py → py.typed} +0 -0
  164. {rslearn-0.0.1.dist-info → rslearn-0.0.21.dist-info}/entry_points.txt +0 -0
  165. {rslearn-0.0.1.dist-info → rslearn-0.0.21.dist-info/licenses}/LICENSE +0 -0
  166. {rslearn-0.0.1.dist-info → rslearn-0.0.21.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,282 @@
1
+ """Elevation data from the Shuttle Radar Topography Mission via NASA Earthdata."""
2
+
3
+ import math
4
+ import os
5
+ import tempfile
6
+ import zipfile
7
+ from datetime import timedelta
8
+ from typing import Any
9
+
10
+ import requests
11
+ import requests.auth
12
+ import shapely
13
+ from upath import UPath
14
+
15
+ from rslearn.config import QueryConfig, SpaceMode
16
+ from rslearn.const import WGS84_PROJECTION
17
+ from rslearn.data_sources import DataSource, DataSourceContext, Item
18
+ from rslearn.log_utils import get_logger
19
+ from rslearn.tile_stores import TileStoreWithLayer
20
+ from rslearn.utils.geometry import STGeometry
21
+
22
+ logger = get_logger(__name__)
23
+
24
+
25
+ class SRTM(DataSource):
26
+ """Data source for SRTM elevation data using NASA Earthdata credentials.
27
+
28
+ See https://e4ftl01.cr.usgs.gov/MEASURES/SRTMGL1.003/ and
29
+ https://dwtkns.com/srtm30m/ for details about the data.
30
+
31
+ The data is split into 1x1-degree tiles, where the filename ends with e.g.
32
+ S28W055.SRTMGL1.hgt.zip (so only the first seven characters change).
33
+
34
+ These URLs can only be accessed with a NASA Earthdata username and password.
35
+
36
+ The zip file contains a single hgt file which can be read by rasterio. It has a
37
+ single 16-bit signed integer band indicating the elevation.
38
+
39
+ Items from this data source do not come with a time range. The band name will match
40
+ that specified in the band set, which should have a single band.
41
+ """
42
+
43
+ BASE_URL = "https://e4ftl01.cr.usgs.gov/MEASURES/SRTMGL1.003/2000.02.11/"
44
+ FILENAME_SUFFIX = ".SRTMGL1.hgt.zip"
45
+
46
+ def __init__(
47
+ self,
48
+ username: str | None = None,
49
+ password: str | None = None,
50
+ timeout: timedelta = timedelta(seconds=10),
51
+ context: DataSourceContext = DataSourceContext(),
52
+ ):
53
+ """Initialize a new SRTM instance.
54
+
55
+ Args:
56
+ username: NASA Earthdata account username. If not set, it is read from the
57
+ NASA_EARTHDATA_USERNAME environment variable.
58
+ password: NASA Earthdata account password. If not set, it is read from the
59
+ NASA_EARTHDATA_PASSWORD environment variable.
60
+ timeout: timeout for requests.
61
+ context: the data source context.
62
+ """
63
+ # Get band name from context if possible, falling back to "srtm".
64
+ if context.layer_config is not None:
65
+ if len(context.layer_config.band_sets) != 1:
66
+ raise ValueError("expected a single band set")
67
+ if len(context.layer_config.band_sets[0].bands) != 1:
68
+ raise ValueError("expected band set to have a single band")
69
+ self.band_name = context.layer_config.band_sets[0].bands[0]
70
+ else:
71
+ self.band_name = "srtm"
72
+
73
+ self.timeout = timeout
74
+
75
+ if username is None:
76
+ username = os.environ["NASA_EARTHDATA_USERNAME"]
77
+ self.username = username
78
+
79
+ if password is None:
80
+ password = os.environ["NASA_EARTHDATA_PASSWORD"]
81
+ self.password = password
82
+
83
+ self.session = requests.session()
84
+
85
+ def get_item_by_name(self, name: str) -> Item:
86
+ """Gets an item by name.
87
+
88
+ Args:
89
+ name: the name of the item to get. For SRTM, the item name is the filename
90
+ of the zip file containing the hgt file.
91
+
92
+ Returns:
93
+ the Item object
94
+ """
95
+ if not name.endswith(self.FILENAME_SUFFIX):
96
+ raise ValueError(
97
+ f"expected item name to end with {self.FILENAME_SUFFIX}, but got {name}"
98
+ )
99
+ # Parse the first seven characters, e.g. S28W055.
100
+ # We do this to reconstruct the geometry of the item.
101
+ lat_sign = name[0]
102
+ lat_degrees = int(name[1:3])
103
+ lon_sign = name[4]
104
+ lon_degrees = int(name[5:8])
105
+
106
+ if lat_sign == "N":
107
+ lat_min = lat_degrees
108
+ elif lat_sign == "S":
109
+ lat_min = -lat_degrees
110
+ else:
111
+ raise ValueError(f"invalid item name {name}")
112
+
113
+ if lon_sign == "E":
114
+ lon_min = lon_degrees
115
+ elif lon_sign == "W":
116
+ lon_min = -lon_degrees
117
+ else:
118
+ raise ValueError(f"invalid item name {name}")
119
+
120
+ geometry = STGeometry(
121
+ WGS84_PROJECTION,
122
+ shapely.box(lon_min, lat_min, lon_min + 1, lat_min + 1),
123
+ None,
124
+ )
125
+ return Item(name, geometry)
126
+
127
+ def _lon_lat_to_item(self, lon_min: int, lat_min: int) -> Item:
128
+ """Get an item based on the 1x1 longitude/latitude grid.
129
+
130
+ Args:
131
+ lon_min: the starting longitude integer of the grid cell.
132
+ lat_min: the starting latitude integer of the grid cell.
133
+
134
+ Returns:
135
+ the Item object.
136
+ """
137
+ # Construct the filename for this grid cell.
138
+ # The item name is just the filename.
139
+ if lon_min < 0:
140
+ lon_part = f"W{-lon_min:03d}"
141
+ else:
142
+ lon_part = f"E{lon_min:03d}"
143
+ if lat_min < 0:
144
+ lat_part = f"S{-lat_min:02d}"
145
+ else:
146
+ lat_part = f"N{lat_min:02d}"
147
+ fname = lat_part + lon_part + self.FILENAME_SUFFIX
148
+
149
+ # We also need the geometry for the item.
150
+ geometry = STGeometry(
151
+ WGS84_PROJECTION,
152
+ shapely.box(lon_min, lat_min, lon_min + 1, lat_min + 1),
153
+ None,
154
+ )
155
+
156
+ return Item(fname, geometry)
157
+
158
+ def get_items(
159
+ self, geometries: list[STGeometry], query_config: QueryConfig
160
+ ) -> list[list[list[Item]]]:
161
+ """Get a list of items in the data source intersecting the given geometries.
162
+
163
+ Args:
164
+ geometries: the spatiotemporal geometries
165
+ query_config: the query configuration
166
+
167
+ Returns:
168
+ List of groups of items that should be retrieved for each geometry.
169
+ """
170
+ # We only support mosaic here, other query modes don't really make sense.
171
+ if query_config.space_mode != SpaceMode.MOSAIC or query_config.max_matches != 1:
172
+ raise ValueError(
173
+ "expected mosaic with max_matches=1 for the query configuration"
174
+ )
175
+
176
+ groups = []
177
+ for geometry in geometries:
178
+ # We iterate over each 1x1 cell that this geometry intersects and include
179
+ # the corresponing item in this item group.
180
+ # Since it is a mosaic with one match, there will just be one item group
181
+ # for each item.
182
+ wgs84_geometry = geometry.to_projection(WGS84_PROJECTION)
183
+ shp_bounds = wgs84_geometry.shp.bounds
184
+ cell_bounds = (
185
+ math.floor(shp_bounds[0]),
186
+ math.floor(shp_bounds[1]),
187
+ math.ceil(shp_bounds[2]),
188
+ math.ceil(shp_bounds[3]),
189
+ )
190
+ # lon_min/lat_min are the lower range of each cell.
191
+ items = []
192
+ for lon_min in range(cell_bounds[0], cell_bounds[2]):
193
+ for lat_min in range(cell_bounds[1], cell_bounds[3]):
194
+ items.append(self._lon_lat_to_item(lon_min, lat_min))
195
+
196
+ logger.debug(f"Got {len(items)} items (grid cells) for geometry")
197
+ groups.append([items])
198
+
199
+ return groups
200
+
201
+ def deserialize_item(self, serialized_item: Any) -> Item:
202
+ """Deserializes an item from JSON-decoded data."""
203
+ assert isinstance(serialized_item, dict)
204
+ return Item.deserialize(serialized_item)
205
+
206
+ def ingest(
207
+ self,
208
+ tile_store: TileStoreWithLayer,
209
+ items: list[Item],
210
+ geometries: list[list[STGeometry]],
211
+ ) -> None:
212
+ """Ingest items into the given tile store.
213
+
214
+ Args:
215
+ tile_store: the tile store to ingest into
216
+ items: the items to ingest
217
+ geometries: a list of geometries needed for each item
218
+ """
219
+ for item in items:
220
+ if tile_store.is_raster_ready(item.name, [self.band_name]):
221
+ continue
222
+
223
+ # Download the item.
224
+ # We first attempt to access it directly, which works if we have already
225
+ # authenticated. If not, we get redirected to a login endpoint where we
226
+ # need to use basic authentication; the endpoint will redirect us back to
227
+ # the original URL.
228
+ url = self.BASE_URL + item.name
229
+ logger.debug(f"Downloading SRTM data for {item.name} from {url}")
230
+
231
+ # Try to access directly.
232
+ response = self.session.get(
233
+ url,
234
+ stream=True,
235
+ timeout=self.timeout.total_seconds(),
236
+ allow_redirects=False,
237
+ )
238
+
239
+ if response.status_code == 302:
240
+ # Encountered redirect, so set response to actually access the redirect
241
+ # URL. This time we follow redirects since it will take us back to the
242
+ # original URL.
243
+ redirect_url = response.headers["Location"]
244
+ logger.debug(f"Following redirect to {redirect_url}")
245
+ auth = requests.auth.HTTPBasicAuth(self.username, self.password)
246
+ response = self.session.get(
247
+ redirect_url,
248
+ stream=True,
249
+ timeout=self.timeout.total_seconds(),
250
+ auth=auth,
251
+ )
252
+
253
+ if response.status_code == 404:
254
+ # Some grid cells don't exist so this isn't a big issue.
255
+ logger.warning(
256
+ f"Skipping item {item.name} because there is no data at that cell"
257
+ )
258
+ continue
259
+ response.raise_for_status()
260
+
261
+ with tempfile.TemporaryDirectory() as tmp_dir:
262
+ # Store it in temporary directory.
263
+ zip_fname = os.path.join(tmp_dir, "data.zip")
264
+ with open(zip_fname, "wb") as f:
265
+ for chunk in response.iter_content(chunk_size=8192):
266
+ f.write(chunk)
267
+
268
+ # Extract the .hgt file.
269
+ logger.debug(f"Extracting data for {item.name}")
270
+ with zipfile.ZipFile(zip_fname) as zip_f:
271
+ member_names = zip_f.namelist()
272
+ if len(member_names) != 1:
273
+ raise ValueError(
274
+ f"expected SRTM zip to have one member but got {member_names}"
275
+ )
276
+ local_fname = zip_f.extract(member_names[0], path=tmp_dir)
277
+
278
+ # Now we can ingest it.
279
+ logger.debug(f"Ingesting data for {item.name}")
280
+ tile_store.write_raster_file(
281
+ item.name, [self.band_name], UPath(local_fname)
282
+ )
@@ -0,0 +1,242 @@
1
+ """Data source for vector EuroCrops crop type data."""
2
+
3
+ import glob
4
+ import os
5
+ import tempfile
6
+ import zipfile
7
+ from datetime import UTC, datetime, timedelta
8
+ from typing import Any
9
+
10
+ import fiona
11
+ import requests
12
+ from rasterio.crs import CRS
13
+
14
+ from rslearn.config import QueryConfig
15
+ from rslearn.const import WGS84_PROJECTION
16
+ from rslearn.data_sources import DataSource, DataSourceContext, Item
17
+ from rslearn.data_sources.utils import match_candidate_items_to_window
18
+ from rslearn.log_utils import get_logger
19
+ from rslearn.tile_stores import TileStoreWithLayer
20
+ from rslearn.utils.feature import Feature
21
+ from rslearn.utils.geometry import Projection, STGeometry, get_global_geometry
22
+
23
+ logger = get_logger(__name__)
24
+
25
+
26
+ class EuroCropsItem(Item):
27
+ """An item in the EuroCrops data source.
28
+
29
+ For simplicity, we have just one item per year, so each item combines all of the
30
+ country-level files for that year.
31
+ """
32
+
33
+ def __init__(self, name: str, geometry: STGeometry, zip_fnames: list[str]):
34
+ """Creates a new EuroCropsItem.
35
+
36
+ Args:
37
+ name: unique name of the item. It is just the year that this item
38
+ corresponds to.
39
+ geometry: the spatial and temporal extent of the item
40
+ zip_fnames: the filenames of the zip files that contain country-level crop
41
+ type data for this year.
42
+ """
43
+ super().__init__(name, geometry)
44
+ self.zip_fnames = zip_fnames
45
+
46
+ def serialize(self) -> dict:
47
+ """Serializes the item to a JSON-encodable dictionary."""
48
+ d = super().serialize()
49
+ d["zip_fnames"] = self.zip_fnames
50
+ return d
51
+
52
+ @staticmethod
53
+ def deserialize(d: dict) -> "EuroCropsItem":
54
+ """Deserializes an item from a JSON-decoded dictionary."""
55
+ item = super(EuroCropsItem, EuroCropsItem).deserialize(d)
56
+ return EuroCropsItem(
57
+ name=item.name, geometry=item.geometry, zip_fnames=d["zip_fnames"]
58
+ )
59
+
60
+
61
+ class EuroCrops(DataSource[EuroCropsItem]):
62
+ """A data source for EuroCrops vector data (v11).
63
+
64
+ See https://zenodo.org/records/14094196 for details.
65
+
66
+ While the source data is split into country-level files, this data source uses one
67
+ item per year for simplicity. So each item corresponds to all of the country-level
68
+ files for that year.
69
+
70
+ Note that the RO_ny.zip file is not used.
71
+ """
72
+
73
+ BASE_URL = "https://zenodo.org/records/14094196/files/"
74
+ FILENAMES_BY_YEAR = {
75
+ 2018: [
76
+ "FR_2018.zip",
77
+ ],
78
+ 2019: [
79
+ "DK_2019.zip",
80
+ ],
81
+ 2020: [
82
+ "ES_NA_2020.zip",
83
+ "FI_2020.zip",
84
+ "HR_2020.zip",
85
+ "NL_2020.zip",
86
+ ],
87
+ 2021: [
88
+ "AT_2021.zip",
89
+ "BE_VLG_2021.zip",
90
+ "BE_WAL_2021.zip",
91
+ "EE_2021.zip",
92
+ "LT_2021.zip",
93
+ "LV_2021.zip",
94
+ "PT_2021.zip",
95
+ "SE_2021.zip",
96
+ "SI_2021.zip",
97
+ "SK_2021.zip",
98
+ ],
99
+ 2023: [
100
+ "CZ_2023.zip",
101
+ "DE_BB_2023.zip",
102
+ "DE_LS_2021.zip",
103
+ "DE_NRW_2021.zip",
104
+ "ES_2023.zip",
105
+ "IE_2023.zip",
106
+ ],
107
+ }
108
+ TIMEOUT = timedelta(seconds=10)
109
+
110
+ def __init__(self, context: DataSourceContext = DataSourceContext()):
111
+ """Create a new EuroCrops."""
112
+ pass
113
+
114
+ def _get_all_items(self) -> list[EuroCropsItem]:
115
+ """Get a list of all available items in the data source."""
116
+ items: list[EuroCropsItem] = []
117
+ for year, fnames in self.FILENAMES_BY_YEAR.items():
118
+ items.append(
119
+ EuroCropsItem(
120
+ str(year),
121
+ get_global_geometry(
122
+ time_range=(
123
+ datetime(year, 1, 1, tzinfo=UTC),
124
+ datetime(year + 1, 1, 1, tzinfo=UTC),
125
+ ),
126
+ ),
127
+ fnames,
128
+ )
129
+ )
130
+ return items
131
+
132
+ def get_items(
133
+ self, geometries: list[STGeometry], query_config: QueryConfig
134
+ ) -> list[list[list[EuroCropsItem]]]:
135
+ """Get a list of items in the data source intersecting the given geometries.
136
+
137
+ Args:
138
+ geometries: the spatiotemporal geometries
139
+ query_config: the query configuration
140
+
141
+ Returns:
142
+ List of groups of items that should be retrieved for each geometry.
143
+ """
144
+ wgs84_geometries = [
145
+ geometry.to_projection(WGS84_PROJECTION) for geometry in geometries
146
+ ]
147
+ all_items = self._get_all_items()
148
+ groups = []
149
+ for geometry in wgs84_geometries:
150
+ cur_groups = match_candidate_items_to_window(
151
+ geometry, all_items, query_config
152
+ )
153
+ groups.append(cur_groups)
154
+ return groups
155
+
156
+ def deserialize_item(self, serialized_item: Any) -> EuroCropsItem:
157
+ """Deserializes an item from JSON-decoded data."""
158
+ return EuroCropsItem.deserialize(serialized_item)
159
+
160
+ def _extract_features(self, fname: str) -> list[Feature]:
161
+ """Download the given zip file, extract shapefile, and return list of features."""
162
+ with tempfile.TemporaryDirectory() as tmp_dir:
163
+ # Download the zip file.
164
+ url = self.BASE_URL + fname
165
+ logger.debug(f"Downloading zip file from {url}")
166
+ response = requests.get(
167
+ url,
168
+ stream=True,
169
+ timeout=self.TIMEOUT.total_seconds(),
170
+ allow_redirects=False,
171
+ )
172
+ response.raise_for_status()
173
+ zip_fname = os.path.join(tmp_dir, "data.zip")
174
+ with open(zip_fname, "wb") as f:
175
+ for chunk in response.iter_content(chunk_size=8192):
176
+ f.write(chunk)
177
+
178
+ # Extract all of the files and look for shapefile filename.
179
+ logger.debug(f"Extracting zip file {fname}")
180
+ with zipfile.ZipFile(zip_fname) as zip_f:
181
+ zip_f.extractall(path=tmp_dir)
182
+
183
+ # The shapefiles or geopackage files can appear at any level in the hierarchy.
184
+ # Most zip files contain one but some contain multiple (one per region).
185
+ shp_fnames = glob.glob(
186
+ "**/*.shp", root_dir=tmp_dir, recursive=True
187
+ ) + glob.glob("**/*.gpkg", root_dir=tmp_dir, recursive=True)
188
+ if len(shp_fnames) == 0:
189
+ tmp_dir_fnames = os.listdir(tmp_dir)
190
+ raise ValueError(
191
+ f"expected {fname} to contain .shp file but none found (matches={shp_fnames}, ls={tmp_dir_fnames})"
192
+ )
193
+
194
+ # Load the features from the shapefile(s).
195
+ features = []
196
+ for shp_fname in shp_fnames:
197
+ logger.debug(f"Loading feature list from {shp_fname}")
198
+ with fiona.open(os.path.join(tmp_dir, shp_fname)) as src:
199
+ crs = CRS.from_wkt(src.crs.to_wkt())
200
+ # Normal GeoJSON should have coordinates in CRS coordinates, i.e. it
201
+ # should be 1 projection unit/pixel.
202
+ projection = Projection(crs, 1, 1)
203
+
204
+ for feat in src:
205
+ features.append(
206
+ Feature.from_geojson(
207
+ projection,
208
+ {
209
+ "type": "Feature",
210
+ "geometry": dict(feat.geometry),
211
+ "properties": dict(feat.properties),
212
+ },
213
+ )
214
+ )
215
+
216
+ return features
217
+
218
+ def ingest(
219
+ self,
220
+ tile_store: TileStoreWithLayer,
221
+ items: list[EuroCropsItem],
222
+ geometries: list[list[STGeometry]],
223
+ ) -> None:
224
+ """Ingest items into the given tile store.
225
+
226
+ Args:
227
+ tile_store: the tile store to ingest into
228
+ items: the items to ingest
229
+ geometries: a list of geometries needed for each item
230
+ """
231
+ for item in items:
232
+ if tile_store.is_vector_ready(item.name):
233
+ continue
234
+
235
+ # Get features across all shapefiles.
236
+ features: list[Feature] = []
237
+ for fname in item.zip_fnames:
238
+ logger.debug(f"Getting features from {fname} for item {item.name}")
239
+ features.extend(self._extract_features(fname))
240
+
241
+ logger.debug(f"Writing features for {item.name} to the tile store")
242
+ tile_store.write_vector(item.name, features)