rslearn 0.0.1__py3-none-any.whl → 0.0.21__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- rslearn/arg_parser.py +31 -0
- rslearn/config/__init__.py +6 -12
- rslearn/config/dataset.py +520 -401
- rslearn/const.py +9 -15
- rslearn/data_sources/__init__.py +8 -23
- rslearn/data_sources/aws_landsat.py +242 -98
- rslearn/data_sources/aws_open_data.py +111 -151
- rslearn/data_sources/aws_sentinel1.py +131 -0
- rslearn/data_sources/climate_data_store.py +471 -0
- rslearn/data_sources/copernicus.py +884 -12
- rslearn/data_sources/data_source.py +43 -12
- rslearn/data_sources/earthdaily.py +484 -0
- rslearn/data_sources/earthdata_srtm.py +282 -0
- rslearn/data_sources/eurocrops.py +242 -0
- rslearn/data_sources/gcp_public_data.py +578 -222
- rslearn/data_sources/google_earth_engine.py +461 -135
- rslearn/data_sources/local_files.py +219 -150
- rslearn/data_sources/openstreetmap.py +51 -89
- rslearn/data_sources/planet.py +24 -60
- rslearn/data_sources/planet_basemap.py +275 -0
- rslearn/data_sources/planetary_computer.py +798 -0
- rslearn/data_sources/usda_cdl.py +195 -0
- rslearn/data_sources/usgs_landsat.py +115 -83
- rslearn/data_sources/utils.py +249 -61
- rslearn/data_sources/vector_source.py +1 -0
- rslearn/data_sources/worldcereal.py +449 -0
- rslearn/data_sources/worldcover.py +144 -0
- rslearn/data_sources/worldpop.py +153 -0
- rslearn/data_sources/xyz_tiles.py +150 -107
- rslearn/dataset/__init__.py +8 -2
- rslearn/dataset/add_windows.py +2 -2
- rslearn/dataset/dataset.py +40 -51
- rslearn/dataset/handler_summaries.py +131 -0
- rslearn/dataset/manage.py +313 -74
- rslearn/dataset/materialize.py +431 -107
- rslearn/dataset/remap.py +29 -4
- rslearn/dataset/storage/__init__.py +1 -0
- rslearn/dataset/storage/file.py +202 -0
- rslearn/dataset/storage/storage.py +140 -0
- rslearn/dataset/window.py +181 -44
- rslearn/lightning_cli.py +454 -0
- rslearn/log_utils.py +24 -0
- rslearn/main.py +384 -181
- rslearn/models/anysat.py +215 -0
- rslearn/models/attention_pooling.py +177 -0
- rslearn/models/clay/clay.py +231 -0
- rslearn/models/clay/configs/metadata.yaml +295 -0
- rslearn/models/clip.py +68 -0
- rslearn/models/component.py +111 -0
- rslearn/models/concatenate_features.py +103 -0
- rslearn/models/conv.py +63 -0
- rslearn/models/croma.py +306 -0
- rslearn/models/detr/__init__.py +5 -0
- rslearn/models/detr/box_ops.py +103 -0
- rslearn/models/detr/detr.py +504 -0
- rslearn/models/detr/matcher.py +107 -0
- rslearn/models/detr/position_encoding.py +114 -0
- rslearn/models/detr/transformer.py +429 -0
- rslearn/models/detr/util.py +24 -0
- rslearn/models/dinov3.py +177 -0
- rslearn/models/faster_rcnn.py +30 -28
- rslearn/models/feature_center_crop.py +53 -0
- rslearn/models/fpn.py +19 -8
- rslearn/models/galileo/__init__.py +5 -0
- rslearn/models/galileo/galileo.py +595 -0
- rslearn/models/galileo/single_file_galileo.py +1678 -0
- rslearn/models/module_wrapper.py +65 -0
- rslearn/models/molmo.py +69 -0
- rslearn/models/multitask.py +384 -28
- rslearn/models/olmoearth_pretrain/__init__.py +1 -0
- rslearn/models/olmoearth_pretrain/model.py +421 -0
- rslearn/models/olmoearth_pretrain/norm.py +86 -0
- rslearn/models/panopticon.py +170 -0
- rslearn/models/panopticon_data/sensors/drone.yaml +32 -0
- rslearn/models/panopticon_data/sensors/enmap.yaml +904 -0
- rslearn/models/panopticon_data/sensors/goes.yaml +9 -0
- rslearn/models/panopticon_data/sensors/himawari.yaml +9 -0
- rslearn/models/panopticon_data/sensors/intuition.yaml +606 -0
- rslearn/models/panopticon_data/sensors/landsat8.yaml +84 -0
- rslearn/models/panopticon_data/sensors/modis_terra.yaml +99 -0
- rslearn/models/panopticon_data/sensors/qb2_ge1.yaml +34 -0
- rslearn/models/panopticon_data/sensors/sentinel1.yaml +85 -0
- rslearn/models/panopticon_data/sensors/sentinel2.yaml +97 -0
- rslearn/models/panopticon_data/sensors/superdove.yaml +60 -0
- rslearn/models/panopticon_data/sensors/wv23.yaml +63 -0
- rslearn/models/pick_features.py +17 -10
- rslearn/models/pooling_decoder.py +60 -7
- rslearn/models/presto/__init__.py +5 -0
- rslearn/models/presto/presto.py +297 -0
- rslearn/models/presto/single_file_presto.py +926 -0
- rslearn/models/prithvi.py +1147 -0
- rslearn/models/resize_features.py +59 -0
- rslearn/models/sam2_enc.py +13 -9
- rslearn/models/satlaspretrain.py +38 -18
- rslearn/models/simple_time_series.py +188 -77
- rslearn/models/singletask.py +24 -13
- rslearn/models/ssl4eo_s12.py +40 -30
- rslearn/models/swin.py +44 -32
- rslearn/models/task_embedding.py +250 -0
- rslearn/models/terramind.py +256 -0
- rslearn/models/trunk.py +139 -0
- rslearn/models/unet.py +68 -22
- rslearn/models/upsample.py +48 -0
- rslearn/models/use_croma.py +508 -0
- rslearn/template_params.py +26 -0
- rslearn/tile_stores/__init__.py +41 -18
- rslearn/tile_stores/default.py +409 -0
- rslearn/tile_stores/tile_store.py +236 -132
- rslearn/train/all_patches_dataset.py +530 -0
- rslearn/train/callbacks/adapters.py +53 -0
- rslearn/train/callbacks/freeze_unfreeze.py +348 -17
- rslearn/train/callbacks/gradients.py +129 -0
- rslearn/train/callbacks/peft.py +116 -0
- rslearn/train/data_module.py +444 -20
- rslearn/train/dataset.py +588 -235
- rslearn/train/lightning_module.py +192 -62
- rslearn/train/model_context.py +88 -0
- rslearn/train/optimizer.py +31 -0
- rslearn/train/prediction_writer.py +319 -84
- rslearn/train/scheduler.py +92 -0
- rslearn/train/tasks/classification.py +55 -28
- rslearn/train/tasks/detection.py +132 -76
- rslearn/train/tasks/embedding.py +120 -0
- rslearn/train/tasks/multi_task.py +28 -14
- rslearn/train/tasks/per_pixel_regression.py +291 -0
- rslearn/train/tasks/regression.py +161 -44
- rslearn/train/tasks/segmentation.py +428 -53
- rslearn/train/tasks/task.py +6 -5
- rslearn/train/transforms/__init__.py +1 -1
- rslearn/train/transforms/concatenate.py +54 -10
- rslearn/train/transforms/crop.py +29 -11
- rslearn/train/transforms/flip.py +18 -6
- rslearn/train/transforms/mask.py +78 -0
- rslearn/train/transforms/normalize.py +101 -17
- rslearn/train/transforms/pad.py +19 -7
- rslearn/train/transforms/resize.py +83 -0
- rslearn/train/transforms/select_bands.py +76 -0
- rslearn/train/transforms/sentinel1.py +75 -0
- rslearn/train/transforms/transform.py +89 -70
- rslearn/utils/__init__.py +2 -6
- rslearn/utils/array.py +8 -6
- rslearn/utils/feature.py +2 -2
- rslearn/utils/fsspec.py +90 -1
- rslearn/utils/geometry.py +347 -7
- rslearn/utils/get_utm_ups_crs.py +2 -3
- rslearn/utils/grid_index.py +5 -5
- rslearn/utils/jsonargparse.py +178 -0
- rslearn/utils/mp.py +4 -3
- rslearn/utils/raster_format.py +268 -116
- rslearn/utils/rtree_index.py +64 -17
- rslearn/utils/sqlite_index.py +7 -1
- rslearn/utils/vector_format.py +252 -97
- {rslearn-0.0.1.dist-info → rslearn-0.0.21.dist-info}/METADATA +532 -283
- rslearn-0.0.21.dist-info/RECORD +167 -0
- {rslearn-0.0.1.dist-info → rslearn-0.0.21.dist-info}/WHEEL +1 -1
- rslearn-0.0.21.dist-info/licenses/NOTICE +115 -0
- rslearn/data_sources/raster_source.py +0 -309
- rslearn/models/registry.py +0 -5
- rslearn/tile_stores/file.py +0 -242
- rslearn/utils/mgrs.py +0 -24
- rslearn/utils/utils.py +0 -22
- rslearn-0.0.1.dist-info/RECORD +0 -88
- /rslearn/{data_sources/geotiff.py → py.typed} +0 -0
- {rslearn-0.0.1.dist-info → rslearn-0.0.21.dist-info}/entry_points.txt +0 -0
- {rslearn-0.0.1.dist-info → rslearn-0.0.21.dist-info/licenses}/LICENSE +0 -0
- {rslearn-0.0.1.dist-info → rslearn-0.0.21.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,449 @@
|
|
|
1
|
+
"""Data source for ESA WorldCover 2021."""
|
|
2
|
+
|
|
3
|
+
import functools
|
|
4
|
+
import json
|
|
5
|
+
import os
|
|
6
|
+
import shutil
|
|
7
|
+
import tempfile
|
|
8
|
+
import zipfile
|
|
9
|
+
|
|
10
|
+
import requests
|
|
11
|
+
from fsspec.implementations.local import LocalFileSystem
|
|
12
|
+
from upath import UPath
|
|
13
|
+
|
|
14
|
+
from rslearn.config import LayerType
|
|
15
|
+
from rslearn.data_sources.local_files import LocalFiles, RasterItemSpec
|
|
16
|
+
from rslearn.log_utils import get_logger
|
|
17
|
+
from rslearn.utils.fsspec import get_upath_local, join_upath, open_atomic
|
|
18
|
+
|
|
19
|
+
from .data_source import DataSourceContext, Item
|
|
20
|
+
|
|
21
|
+
logger = get_logger(__name__)
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
class WorldCereal(LocalFiles):
|
|
25
|
+
"""A data source for the ESA WorldCereal 2021 agricultural land cover map.
|
|
26
|
+
|
|
27
|
+
For details about the land cover map, see https://esa-worldcereal.org/en.
|
|
28
|
+
"""
|
|
29
|
+
|
|
30
|
+
ZENODO_RECORD_ID = 7875105
|
|
31
|
+
ZENODO_URL = f"https://zenodo.org/api/deposit/depositions/{ZENODO_RECORD_ID}/files"
|
|
32
|
+
|
|
33
|
+
# these are the subset of filenames we want to download, which contain the
|
|
34
|
+
# model confidence values. This defines the order of the bands in the
|
|
35
|
+
# final output tif files
|
|
36
|
+
ZIP_FILENAMES = [
|
|
37
|
+
"WorldCereal_2021_tc-annual_temporarycrops_confidence.zip",
|
|
38
|
+
"WorldCereal_2021_tc-annual_temporarycrops_classification.zip",
|
|
39
|
+
"WorldCereal_2021_tc-maize-main_irrigation_confidence.zip",
|
|
40
|
+
"WorldCereal_2021_tc-maize-main_irrigation_classification.zip",
|
|
41
|
+
"WorldCereal_2021_tc-maize-main_maize_confidence.zip",
|
|
42
|
+
"WorldCereal_2021_tc-maize-main_maize_classification.zip",
|
|
43
|
+
"WorldCereal_2021_tc-maize-second_irrigation_confidence.zip",
|
|
44
|
+
"WorldCereal_2021_tc-maize-second_irrigation_classification.zip",
|
|
45
|
+
"WorldCereal_2021_tc-maize-second_maize_confidence.zip",
|
|
46
|
+
"WorldCereal_2021_tc-maize-second_maize_classification.zip",
|
|
47
|
+
"WorldCereal_2021_tc-springcereals_springcereals_confidence.zip",
|
|
48
|
+
"WorldCereal_2021_tc-springcereals_springcereals_classification.zip",
|
|
49
|
+
"WorldCereal_2021_tc-wintercereals_irrigation_confidence.zip",
|
|
50
|
+
"WorldCereal_2021_tc-wintercereals_irrigation_classification.zip",
|
|
51
|
+
"WorldCereal_2021_tc-wintercereals_wintercereals_confidence.zip",
|
|
52
|
+
"WorldCereal_2021_tc-wintercereals_wintercereals_classification.zip",
|
|
53
|
+
]
|
|
54
|
+
TIMEOUT_SECONDS = 10
|
|
55
|
+
|
|
56
|
+
# this can be obtained using the following code:
|
|
57
|
+
# ```
|
|
58
|
+
# response = requests.get(cls.ZENODO_URL)
|
|
59
|
+
# response.raise_for_status()
|
|
60
|
+
# ZENODO_FILES_DATA = response.json()
|
|
61
|
+
# ```
|
|
62
|
+
# we hardcode it here because othewerwise we get complaints from
|
|
63
|
+
# zenodo about repeatedly asking for it.
|
|
64
|
+
ZENODO_FILES_DATA: list[dict] = [
|
|
65
|
+
{
|
|
66
|
+
"id": "21551c80-0df9-4add-abaa-b66fff68179c",
|
|
67
|
+
"filename": "WorldCereal_2021_tc-annual_temporarycrops_classification.zip",
|
|
68
|
+
"filesize": 15500797967.0,
|
|
69
|
+
"checksum": "c006c34fca0253251a8d1ea73cf837a8",
|
|
70
|
+
"links": {
|
|
71
|
+
"self": "https://zenodo.org/api/deposit/depositions/7875105/files/21551c80-0df9-4add-abaa-b66fff68179c",
|
|
72
|
+
"download": "https://zenodo.org/api/records/7875105/files/WorldCereal_2021_tc-annual_temporarycrops_classification.zip/content",
|
|
73
|
+
},
|
|
74
|
+
},
|
|
75
|
+
{
|
|
76
|
+
"id": "2fed6859-5729-4ab1-9d33-e15464c99a5b",
|
|
77
|
+
"filename": "WorldCereal_2021_tc-annual_temporarycrops_confidence.zip",
|
|
78
|
+
"filesize": 24969180828.0,
|
|
79
|
+
"checksum": "84a953be71292d02cceb6c64b2008ad7",
|
|
80
|
+
"links": {
|
|
81
|
+
"self": "https://zenodo.org/api/deposit/depositions/7875105/files/2fed6859-5729-4ab1-9d33-e15464c99a5b",
|
|
82
|
+
"download": "https://zenodo.org/api/records/7875105/files/WorldCereal_2021_tc-annual_temporarycrops_confidence.zip/content",
|
|
83
|
+
},
|
|
84
|
+
},
|
|
85
|
+
{
|
|
86
|
+
"id": "2cab95a8-24d9-45cf-ac70-67fa4b6bda64",
|
|
87
|
+
"filename": "WorldCereal_2021_tc-maize-main_irrigation_classification.zip",
|
|
88
|
+
"filesize": 17247922829.0,
|
|
89
|
+
"checksum": "ceaf240dc4bba5e19491dd3c9893ae34",
|
|
90
|
+
"links": {
|
|
91
|
+
"self": "https://zenodo.org/api/deposit/depositions/7875105/files/2cab95a8-24d9-45cf-ac70-67fa4b6bda64",
|
|
92
|
+
"download": "https://zenodo.org/api/records/7875105/files/WorldCereal_2021_tc-maize-main_irrigation_classification.zip/content",
|
|
93
|
+
},
|
|
94
|
+
},
|
|
95
|
+
{
|
|
96
|
+
"id": "54d63601-cda8-4f10-8710-a2068e697418",
|
|
97
|
+
"filename": "WorldCereal_2021_tc-maize-main_irrigation_confidence.zip",
|
|
98
|
+
"filesize": 11327157543.0,
|
|
99
|
+
"checksum": "c509ee2cb8b6fc44383788ffaa248950",
|
|
100
|
+
"links": {
|
|
101
|
+
"self": "https://zenodo.org/api/deposit/depositions/7875105/files/54d63601-cda8-4f10-8710-a2068e697418",
|
|
102
|
+
"download": "https://zenodo.org/api/records/7875105/files/WorldCereal_2021_tc-maize-main_irrigation_confidence.zip/content",
|
|
103
|
+
},
|
|
104
|
+
},
|
|
105
|
+
{
|
|
106
|
+
"id": "b2278b6c-c2f5-49c1-8ebc-e828dbf8c27d",
|
|
107
|
+
"filename": "WorldCereal_2021_tc-maize-main_maize_classification.zip",
|
|
108
|
+
"filesize": 18210475632.0,
|
|
109
|
+
"checksum": "ff298db1b654b91fcfa27495d878932d",
|
|
110
|
+
"links": {
|
|
111
|
+
"self": "https://zenodo.org/api/deposit/depositions/7875105/files/b2278b6c-c2f5-49c1-8ebc-e828dbf8c27d",
|
|
112
|
+
"download": "https://zenodo.org/api/records/7875105/files/WorldCereal_2021_tc-maize-main_maize_classification.zip/content",
|
|
113
|
+
},
|
|
114
|
+
},
|
|
115
|
+
{
|
|
116
|
+
"id": "277c0d06-b5ae-4748-bad1-c135084276ef",
|
|
117
|
+
"filename": "WorldCereal_2021_tc-maize-main_maize_confidence.zip",
|
|
118
|
+
"filesize": 10442831518.0,
|
|
119
|
+
"checksum": "0e6bb70209a83b526ec146e5e4ed3451",
|
|
120
|
+
"links": {
|
|
121
|
+
"self": "https://zenodo.org/api/deposit/depositions/7875105/files/277c0d06-b5ae-4748-bad1-c135084276ef",
|
|
122
|
+
"download": "https://zenodo.org/api/records/7875105/files/WorldCereal_2021_tc-maize-main_maize_confidence.zip/content",
|
|
123
|
+
},
|
|
124
|
+
},
|
|
125
|
+
{
|
|
126
|
+
"id": "d9c5dbe4-d027-47aa-bb6e-806c9964f73e",
|
|
127
|
+
"filename": "WorldCereal_2021_tc-maize-second_irrigation_classification.zip",
|
|
128
|
+
"filesize": 6703649764.0,
|
|
129
|
+
"checksum": "7221b40181835c5226d357ae3fec434f",
|
|
130
|
+
"links": {
|
|
131
|
+
"self": "https://zenodo.org/api/deposit/depositions/7875105/files/d9c5dbe4-d027-47aa-bb6e-806c9964f73e",
|
|
132
|
+
"download": "https://zenodo.org/api/records/7875105/files/WorldCereal_2021_tc-maize-second_irrigation_classification.zip/content",
|
|
133
|
+
},
|
|
134
|
+
},
|
|
135
|
+
{
|
|
136
|
+
"id": "f47baf24-27d9-4913-a483-ec86ae87e60a",
|
|
137
|
+
"filename": "WorldCereal_2021_tc-maize-second_irrigation_confidence.zip",
|
|
138
|
+
"filesize": 3813149175.0,
|
|
139
|
+
"checksum": "cb8b91155c8fcf38f869875f2cb35200",
|
|
140
|
+
"links": {
|
|
141
|
+
"self": "https://zenodo.org/api/deposit/depositions/7875105/files/f47baf24-27d9-4913-a483-ec86ae87e60a",
|
|
142
|
+
"download": "https://zenodo.org/api/records/7875105/files/WorldCereal_2021_tc-maize-second_irrigation_confidence.zip/content",
|
|
143
|
+
},
|
|
144
|
+
},
|
|
145
|
+
{
|
|
146
|
+
"id": "93ae9f7f-f989-4fc5-837a-d27652b761f7",
|
|
147
|
+
"filename": "WorldCereal_2021_tc-maize-second_maize_classification.zip",
|
|
148
|
+
"filesize": 6917008439.0,
|
|
149
|
+
"checksum": "aa883b52451f878e6b4462d27410707e",
|
|
150
|
+
"links": {
|
|
151
|
+
"self": "https://zenodo.org/api/deposit/depositions/7875105/files/93ae9f7f-f989-4fc5-837a-d27652b761f7",
|
|
152
|
+
"download": "https://zenodo.org/api/records/7875105/files/WorldCereal_2021_tc-maize-second_maize_classification.zip/content",
|
|
153
|
+
},
|
|
154
|
+
},
|
|
155
|
+
{
|
|
156
|
+
"id": "d3a0df02-8034-463f-a923-2bfe0c2719ac",
|
|
157
|
+
"filename": "WorldCereal_2021_tc-maize-second_maize_confidence.zip",
|
|
158
|
+
"filesize": 3752378387.0,
|
|
159
|
+
"checksum": "8a819762b7f3950839b0e832cb346e30",
|
|
160
|
+
"links": {
|
|
161
|
+
"self": "https://zenodo.org/api/deposit/depositions/7875105/files/d3a0df02-8034-463f-a923-2bfe0c2719ac",
|
|
162
|
+
"download": "https://zenodo.org/api/records/7875105/files/WorldCereal_2021_tc-maize-second_maize_confidence.zip/content",
|
|
163
|
+
},
|
|
164
|
+
},
|
|
165
|
+
{
|
|
166
|
+
"id": "7a257437-89fe-4278-94fe-90a66e81e1bd",
|
|
167
|
+
"filename": "WorldCereal_2021_tc-springcereals_springcereals_classification.zip",
|
|
168
|
+
"filesize": 7008931281.0,
|
|
169
|
+
"checksum": "bb6e1124938e3a68b6e47d156f17bf86",
|
|
170
|
+
"links": {
|
|
171
|
+
"self": "https://zenodo.org/api/deposit/depositions/7875105/files/7a257437-89fe-4278-94fe-90a66e81e1bd",
|
|
172
|
+
"download": "https://zenodo.org/api/records/7875105/files/WorldCereal_2021_tc-springcereals_springcereals_classification.zip/content",
|
|
173
|
+
},
|
|
174
|
+
},
|
|
175
|
+
{
|
|
176
|
+
"id": "a0b91677-f110-4df5-a5fd-7b1849895a02",
|
|
177
|
+
"filename": "WorldCereal_2021_tc-springcereals_springcereals_confidence.zip",
|
|
178
|
+
"filesize": 4708773375.0,
|
|
179
|
+
"checksum": "fd8dec8de691738df520c1ab451c7870",
|
|
180
|
+
"links": {
|
|
181
|
+
"self": "https://zenodo.org/api/deposit/depositions/7875105/files/a0b91677-f110-4df5-a5fd-7b1849895a02",
|
|
182
|
+
"download": "https://zenodo.org/api/records/7875105/files/WorldCereal_2021_tc-springcereals_springcereals_confidence.zip/content",
|
|
183
|
+
},
|
|
184
|
+
},
|
|
185
|
+
{
|
|
186
|
+
"id": "a5774a05-ee8e-42df-bf06-68ebc6c14426",
|
|
187
|
+
"filename": "WorldCereal_2021_tc-wintercereals_activecropland_classification.zip",
|
|
188
|
+
"filesize": 20001277863.0,
|
|
189
|
+
"checksum": "3933653452a2e0b821c35091b6f4a035",
|
|
190
|
+
"links": {
|
|
191
|
+
"self": "https://zenodo.org/api/deposit/depositions/7875105/files/a5774a05-ee8e-42df-bf06-68ebc6c14426",
|
|
192
|
+
"download": "https://zenodo.org/api/records/7875105/files/WorldCereal_2021_tc-wintercereals_activecropland_classification.zip/content",
|
|
193
|
+
},
|
|
194
|
+
},
|
|
195
|
+
{
|
|
196
|
+
"id": "5a4adaa6-e50a-469a-b401-6ccca02de443",
|
|
197
|
+
"filename": "WorldCereal_2021_tc-wintercereals_irrigation_classification.zip",
|
|
198
|
+
"filesize": 18019534510.0,
|
|
199
|
+
"checksum": "5032b11cf380d8cef07767e86ef4ee54",
|
|
200
|
+
"links": {
|
|
201
|
+
"self": "https://zenodo.org/api/deposit/depositions/7875105/files/5a4adaa6-e50a-469a-b401-6ccca02de443",
|
|
202
|
+
"download": "https://zenodo.org/api/records/7875105/files/WorldCereal_2021_tc-wintercereals_irrigation_classification.zip/content",
|
|
203
|
+
},
|
|
204
|
+
},
|
|
205
|
+
{
|
|
206
|
+
"id": "23301576-64d2-48a1-9b19-0c126158c24d",
|
|
207
|
+
"filename": "WorldCereal_2021_tc-wintercereals_irrigation_confidence.zip",
|
|
208
|
+
"filesize": 11447731232.0,
|
|
209
|
+
"checksum": "f84c4088ac42bb67f308be50159ca778",
|
|
210
|
+
"links": {
|
|
211
|
+
"self": "https://zenodo.org/api/deposit/depositions/7875105/files/23301576-64d2-48a1-9b19-0c126158c24d",
|
|
212
|
+
"download": "https://zenodo.org/api/records/7875105/files/WorldCereal_2021_tc-wintercereals_irrigation_confidence.zip/content",
|
|
213
|
+
},
|
|
214
|
+
},
|
|
215
|
+
{
|
|
216
|
+
"id": "9ab67c40-9072-44dc-8f6b-892fcaa3c079",
|
|
217
|
+
"filename": "WorldCereal_2021_tc-wintercereals_wintercereals_classification.zip",
|
|
218
|
+
"filesize": 18523882137.0,
|
|
219
|
+
"checksum": "386ce3fca8ba5577e2b62d6f3ea45b27",
|
|
220
|
+
"links": {
|
|
221
|
+
"self": "https://zenodo.org/api/deposit/depositions/7875105/files/9ab67c40-9072-44dc-8f6b-892fcaa3c079",
|
|
222
|
+
"download": "https://zenodo.org/api/records/7875105/files/WorldCereal_2021_tc-wintercereals_wintercereals_classification.zip/content",
|
|
223
|
+
},
|
|
224
|
+
},
|
|
225
|
+
{
|
|
226
|
+
"id": "b4ce9cc1-a745-450a-b2e9-c4fb08059a93",
|
|
227
|
+
"filename": "WorldCereal_2021_tc-wintercereals_wintercereals_confidence.zip",
|
|
228
|
+
"filesize": 10174751452.0,
|
|
229
|
+
"checksum": "5870da83aaa4b3761cad3750feb73e43",
|
|
230
|
+
"links": {
|
|
231
|
+
"self": "https://zenodo.org/api/deposit/depositions/7875105/files/b4ce9cc1-a745-450a-b2e9-c4fb08059a93",
|
|
232
|
+
"download": "https://zenodo.org/api/records/7875105/files/WorldCereal_2021_tc-wintercereals_wintercereals_confidence.zip/content",
|
|
233
|
+
},
|
|
234
|
+
},
|
|
235
|
+
]
|
|
236
|
+
|
|
237
|
+
def __init__(
|
|
238
|
+
self,
|
|
239
|
+
worldcereal_dir: str,
|
|
240
|
+
band: str | None = None,
|
|
241
|
+
context: DataSourceContext = DataSourceContext(),
|
|
242
|
+
) -> None:
|
|
243
|
+
"""Create a new WorldCereal.
|
|
244
|
+
|
|
245
|
+
Args:
|
|
246
|
+
worldcereal_dir: the directory to extract the WorldCereal GeoTIFF files. For
|
|
247
|
+
high performance, this should be a local directory; if the dataset is
|
|
248
|
+
remote, prefix with a protocol ("file://") to use a local directory
|
|
249
|
+
instead of a path relative to the dataset path.
|
|
250
|
+
band: the worldcereal band to process. This will only be used if the layer
|
|
251
|
+
config is missing from the context.
|
|
252
|
+
context: the data source context.
|
|
253
|
+
"""
|
|
254
|
+
if context.ds_path is not None:
|
|
255
|
+
worldcereal_upath = join_upath(context.ds_path, worldcereal_dir)
|
|
256
|
+
else:
|
|
257
|
+
worldcereal_upath = UPath(worldcereal_dir)
|
|
258
|
+
|
|
259
|
+
if context.layer_config is not None:
|
|
260
|
+
if len(context.layer_config.band_sets) != 1:
|
|
261
|
+
raise ValueError("expected a single band set")
|
|
262
|
+
if len(context.layer_config.band_sets[0].bands) != 1:
|
|
263
|
+
raise ValueError("expected band set to have a single band")
|
|
264
|
+
self.band = context.layer_config.band_sets[0].bands[0]
|
|
265
|
+
elif band is not None:
|
|
266
|
+
self.band = band
|
|
267
|
+
else:
|
|
268
|
+
raise ValueError("band must be set if layer config is not in the context")
|
|
269
|
+
|
|
270
|
+
tif_dir, tif_filepath = self.download_worldcereal_data(
|
|
271
|
+
self.band, worldcereal_upath
|
|
272
|
+
)
|
|
273
|
+
all_aezs: set[int] = self.all_aezs_from_tifs(tif_filepath)
|
|
274
|
+
|
|
275
|
+
# now that we have all our aezs, lets match them to the bands
|
|
276
|
+
item_specs: list[RasterItemSpec] = []
|
|
277
|
+
for aez in all_aezs:
|
|
278
|
+
item_spec = RasterItemSpec(
|
|
279
|
+
fnames=[],
|
|
280
|
+
bands=[],
|
|
281
|
+
# must be a str since we / with a posix path later
|
|
282
|
+
name=str(aez),
|
|
283
|
+
)
|
|
284
|
+
aez_band_filepath = self.filepath_for_product_aez(tif_filepath, aez)
|
|
285
|
+
if aez_band_filepath is not None:
|
|
286
|
+
item_spec.fnames.append(aez_band_filepath.absolute().as_uri())
|
|
287
|
+
assert item_spec.bands is not None
|
|
288
|
+
item_spec.bands.append([self.band])
|
|
289
|
+
item_specs.append(item_spec)
|
|
290
|
+
if len(item_specs) == 0:
|
|
291
|
+
raise ValueError(f"No AEZ files found for {self.band}")
|
|
292
|
+
|
|
293
|
+
super().__init__(
|
|
294
|
+
src_dir=tif_dir,
|
|
295
|
+
raster_item_specs=item_specs,
|
|
296
|
+
layer_type=LayerType.RASTER,
|
|
297
|
+
context=context,
|
|
298
|
+
)
|
|
299
|
+
|
|
300
|
+
@staticmethod
|
|
301
|
+
def band_from_zipfilename(filename: str) -> str:
|
|
302
|
+
"""Return the band name given the zipfilename."""
|
|
303
|
+
# [:-4] to remove ".zip"
|
|
304
|
+
_, _, season, product, confidence_or_classification = filename[:-4].split("_")
|
|
305
|
+
# band names must not contain '_'
|
|
306
|
+
return "-".join([season, product, confidence_or_classification])
|
|
307
|
+
|
|
308
|
+
@staticmethod
|
|
309
|
+
def zip_filepath_from_filename(filename: str) -> str:
|
|
310
|
+
"""Given a filename, return the filepath of the extracted tifs."""
|
|
311
|
+
_, _, season, product, confidence_or_classification = filename[:-4].split("_")
|
|
312
|
+
prefix = "data/worldcereal_data/MAP-v3/2021"
|
|
313
|
+
if confidence_or_classification == "confidence":
|
|
314
|
+
aez_name = "aez_downsampled"
|
|
315
|
+
else:
|
|
316
|
+
aez_name = "aez"
|
|
317
|
+
# [:-4] to remove ".zip"
|
|
318
|
+
|
|
319
|
+
return f"{prefix}/{season}/{product}/{aez_name}/{confidence_or_classification}"
|
|
320
|
+
|
|
321
|
+
@staticmethod
|
|
322
|
+
def all_aezs_from_tifs(filepath: UPath) -> set[int]:
|
|
323
|
+
"""Given a filepath containing many tif files, extract all the AEZs."""
|
|
324
|
+
all_tifs = filepath.glob("*.tif")
|
|
325
|
+
aezs: set = set()
|
|
326
|
+
for tif_file in all_tifs:
|
|
327
|
+
aezs.add(int(tif_file.name.split("_")[0]))
|
|
328
|
+
return aezs
|
|
329
|
+
|
|
330
|
+
@staticmethod
|
|
331
|
+
def filepath_for_product_aez(path_to_tifs: UPath, aez: int) -> UPath | None:
|
|
332
|
+
"""Given a path for the tifs for a band and an aez, return the tif file if it exists."""
|
|
333
|
+
aez_file = list(path_to_tifs.glob(f"{aez}_*.tif"))
|
|
334
|
+
if len(aez_file) == 0:
|
|
335
|
+
return None
|
|
336
|
+
elif len(aez_file) == 1:
|
|
337
|
+
return aez_file[0]
|
|
338
|
+
raise ValueError(f"Got more than one tif for {aez} in {path_to_tifs}")
|
|
339
|
+
|
|
340
|
+
@classmethod
|
|
341
|
+
def download_worldcereal_data(
|
|
342
|
+
cls, band: str, worldcereal_dir: UPath
|
|
343
|
+
) -> tuple[UPath, dict[str, UPath]]:
|
|
344
|
+
"""Download and extract the WorldCereal data.
|
|
345
|
+
|
|
346
|
+
If the data was previously downloaded, this function returns quickly.
|
|
347
|
+
|
|
348
|
+
Args:
|
|
349
|
+
band: the worldcereal band to download.
|
|
350
|
+
worldcereal_dir: the directory to download to.
|
|
351
|
+
|
|
352
|
+
Returns:
|
|
353
|
+
tif_dir: the sub-directory containing GeoTIFFs
|
|
354
|
+
tif_filepaths: tif dir is nested (i.e. tif_dir points to "data" while the tifs
|
|
355
|
+
are actually in "data/worldcereal/MAP-v3/2021..."). This points to the
|
|
356
|
+
specific directories containing the tifs for each band.
|
|
357
|
+
"""
|
|
358
|
+
# Download the zip files (if they don't already exist).
|
|
359
|
+
zip_dir = worldcereal_dir / "zips"
|
|
360
|
+
zip_dir.mkdir(parents=True, exist_ok=True)
|
|
361
|
+
logger.debug(f"Worldcereal zipfile: {zip_dir}")
|
|
362
|
+
|
|
363
|
+
# Fetch list of files from Zenodo's Deposition Files API
|
|
364
|
+
# f["filename"] maps to the ZIP_FILENAMES
|
|
365
|
+
files_to_download = [
|
|
366
|
+
f
|
|
367
|
+
for f in cls.ZENODO_FILES_DATA
|
|
368
|
+
if cls.band_from_zipfilename(f["filename"]) == band
|
|
369
|
+
]
|
|
370
|
+
if len(files_to_download) != 1:
|
|
371
|
+
raise ValueError(
|
|
372
|
+
f"Got != 1 suitable filenames for {band}: {[f['filename'] for f in files_to_download]}"
|
|
373
|
+
)
|
|
374
|
+
file_to_download = files_to_download[0]
|
|
375
|
+
# now its also in the right order for when we generate the files
|
|
376
|
+
filename: str = file_to_download["filename"]
|
|
377
|
+
if filename not in cls.ZIP_FILENAMES:
|
|
378
|
+
raise ValueError(f"Unsupported filename {filename} for band {band}")
|
|
379
|
+
file_url = file_to_download["links"]["download"]
|
|
380
|
+
# Determine full filepath and create necessary folders for nested structure
|
|
381
|
+
zip_filepath = zip_dir / filename
|
|
382
|
+
if not zip_filepath.exists():
|
|
383
|
+
# Download the file with resume support
|
|
384
|
+
logger.debug(f"Downloading {file_url} to {zip_filepath}")
|
|
385
|
+
with requests.get(file_url, stream=True, timeout=cls.TIMEOUT_SECONDS) as r:
|
|
386
|
+
r.raise_for_status()
|
|
387
|
+
with open_atomic(zip_filepath, "wb") as f:
|
|
388
|
+
for chunk in r.iter_content(chunk_size=8192):
|
|
389
|
+
f.write(chunk)
|
|
390
|
+
|
|
391
|
+
# Extract the zip files.
|
|
392
|
+
# We use a .extraction_complete file to indicate that the extraction is done.
|
|
393
|
+
tif_dir = worldcereal_dir / "tifs"
|
|
394
|
+
tif_dir.mkdir(parents=True, exist_ok=True)
|
|
395
|
+
|
|
396
|
+
completed_fname = zip_dir / (filename + ".extraction_complete")
|
|
397
|
+
if completed_fname.exists():
|
|
398
|
+
logger.debug("%s has already been extracted", filename)
|
|
399
|
+
else:
|
|
400
|
+
logger.debug("extracting %s to %s", filename, tif_dir)
|
|
401
|
+
|
|
402
|
+
# If the tif_dir is remote, we need to extract to a temporary local
|
|
403
|
+
# directory first and then copy it over.
|
|
404
|
+
if isinstance(tif_dir.fs, LocalFileSystem):
|
|
405
|
+
local_dir = tif_dir.path
|
|
406
|
+
else:
|
|
407
|
+
tmp_dir = tempfile.TemporaryDirectory()
|
|
408
|
+
local_dir = tmp_dir.name
|
|
409
|
+
|
|
410
|
+
with get_upath_local(zip_filepath) as local_fname:
|
|
411
|
+
with zipfile.ZipFile(local_fname) as zip_f:
|
|
412
|
+
zip_f.extractall(local_dir)
|
|
413
|
+
|
|
414
|
+
# Copy it over if the tif_dir was remote.
|
|
415
|
+
if not isinstance(tif_dir.fs, LocalFileSystem):
|
|
416
|
+
for fname in os.listdir(local_dir):
|
|
417
|
+
with open(os.path.join(local_dir, fname), "rb") as src:
|
|
418
|
+
with (tif_dir / fname).open("wb") as dst:
|
|
419
|
+
shutil.copyfileobj(src, dst)
|
|
420
|
+
|
|
421
|
+
# Mark the extraction complete.
|
|
422
|
+
completed_fname.touch()
|
|
423
|
+
tif_filepath = tif_dir / cls.zip_filepath_from_filename(filename)
|
|
424
|
+
|
|
425
|
+
return tif_dir, tif_filepath
|
|
426
|
+
|
|
427
|
+
@functools.cache
|
|
428
|
+
def list_items(self) -> list[Item]:
|
|
429
|
+
"""Lists items from the source directory while maintaining a cache file.
|
|
430
|
+
|
|
431
|
+
This is identical to LocalFiles.list_items except that a unique summary
|
|
432
|
+
is made per band (since we treat each band separately now.)
|
|
433
|
+
"""
|
|
434
|
+
cache_fname = self.src_dir / f"{self.band}_summary.json"
|
|
435
|
+
if not cache_fname.exists():
|
|
436
|
+
logger.debug("cache at %s does not exist, listing items", cache_fname)
|
|
437
|
+
items = self.importer.list_items(self.src_dir)
|
|
438
|
+
serialized_items = [item.serialize() for item in items]
|
|
439
|
+
with cache_fname.open("w") as f:
|
|
440
|
+
json.dump(serialized_items, f)
|
|
441
|
+
return items
|
|
442
|
+
|
|
443
|
+
logger.debug("loading item list from cache at %s", cache_fname)
|
|
444
|
+
with cache_fname.open() as f:
|
|
445
|
+
serialized_items = json.load(f)
|
|
446
|
+
return [
|
|
447
|
+
self.deserialize_item(serialized_item)
|
|
448
|
+
for serialized_item in serialized_items
|
|
449
|
+
]
|
|
@@ -0,0 +1,144 @@
|
|
|
1
|
+
"""Data source for ESA WorldCover 2021."""
|
|
2
|
+
|
|
3
|
+
import os
|
|
4
|
+
import shutil
|
|
5
|
+
import tempfile
|
|
6
|
+
import zipfile
|
|
7
|
+
|
|
8
|
+
import requests
|
|
9
|
+
from fsspec.implementations.local import LocalFileSystem
|
|
10
|
+
from upath import UPath
|
|
11
|
+
|
|
12
|
+
from rslearn.config import LayerType
|
|
13
|
+
from rslearn.data_sources import DataSourceContext
|
|
14
|
+
from rslearn.data_sources.local_files import LocalFiles
|
|
15
|
+
from rslearn.log_utils import get_logger
|
|
16
|
+
from rslearn.utils.fsspec import get_upath_local, join_upath, open_atomic
|
|
17
|
+
|
|
18
|
+
logger = get_logger(__name__)
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
class WorldCover(LocalFiles):
|
|
22
|
+
"""A data source for the ESA WorldCover 2021 land cover map.
|
|
23
|
+
|
|
24
|
+
For details about the land cover map, see https://worldcover2021.esa.int/.
|
|
25
|
+
|
|
26
|
+
This data source downloads the 18 zip files that contain the map. They are then
|
|
27
|
+
extracted, yielding 2,651 GeoTIFF files. These are then used with
|
|
28
|
+
rslearn.data_sources.local_files.LocalFiles to implement the data source.
|
|
29
|
+
"""
|
|
30
|
+
|
|
31
|
+
BASE_URL = "https://worldcover2021.esa.int/data/archive/"
|
|
32
|
+
ZIP_FILENAMES = [
|
|
33
|
+
"ESA_WorldCover_10m_2021_v200_60deg_macrotile_N30E000.zip",
|
|
34
|
+
"ESA_WorldCover_10m_2021_v200_60deg_macrotile_N30E060.zip",
|
|
35
|
+
"ESA_WorldCover_10m_2021_v200_60deg_macrotile_N30E120.zip",
|
|
36
|
+
"ESA_WorldCover_10m_2021_v200_60deg_macrotile_N30W060.zip",
|
|
37
|
+
"ESA_WorldCover_10m_2021_v200_60deg_macrotile_N30W120.zip",
|
|
38
|
+
"ESA_WorldCover_10m_2021_v200_60deg_macrotile_N30W180.zip",
|
|
39
|
+
"ESA_WorldCover_10m_2021_v200_60deg_macrotile_S30E000.zip",
|
|
40
|
+
"ESA_WorldCover_10m_2021_v200_60deg_macrotile_S30E060.zip",
|
|
41
|
+
"ESA_WorldCover_10m_2021_v200_60deg_macrotile_S30E120.zip",
|
|
42
|
+
"ESA_WorldCover_10m_2021_v200_60deg_macrotile_S30W060.zip",
|
|
43
|
+
"ESA_WorldCover_10m_2021_v200_60deg_macrotile_S30W120.zip",
|
|
44
|
+
"ESA_WorldCover_10m_2021_v200_60deg_macrotile_S30W180.zip",
|
|
45
|
+
"ESA_WorldCover_10m_2021_v200_60deg_macrotile_S90E000.zip",
|
|
46
|
+
"ESA_WorldCover_10m_2021_v200_60deg_macrotile_S90E060.zip",
|
|
47
|
+
"ESA_WorldCover_10m_2021_v200_60deg_macrotile_S90E120.zip",
|
|
48
|
+
"ESA_WorldCover_10m_2021_v200_60deg_macrotile_S90W060.zip",
|
|
49
|
+
"ESA_WorldCover_10m_2021_v200_60deg_macrotile_S90W120.zip",
|
|
50
|
+
"ESA_WorldCover_10m_2021_v200_60deg_macrotile_S90W180.zip",
|
|
51
|
+
]
|
|
52
|
+
TIMEOUT_SECONDS = 10
|
|
53
|
+
|
|
54
|
+
def __init__(
|
|
55
|
+
self,
|
|
56
|
+
worldcover_dir: str,
|
|
57
|
+
context: DataSourceContext = DataSourceContext(),
|
|
58
|
+
) -> None:
|
|
59
|
+
"""Create a new WorldCover.
|
|
60
|
+
|
|
61
|
+
Args:
|
|
62
|
+
config: configuration for this layer. It should specify a single band
|
|
63
|
+
called B1 which will contain the land cover class.
|
|
64
|
+
worldcover_dir: the directory to extract the WorldCover GeoTIFF files. For
|
|
65
|
+
high performance, this should be a local directory; if the dataset is
|
|
66
|
+
remote, prefix with a protocol ("file://") to use a local directory
|
|
67
|
+
instead of a path relative to the dataset path.
|
|
68
|
+
context: the data source context.
|
|
69
|
+
"""
|
|
70
|
+
if context.ds_path is not None:
|
|
71
|
+
worldcover_upath = join_upath(context.ds_path, worldcover_dir)
|
|
72
|
+
else:
|
|
73
|
+
worldcover_upath = UPath(worldcover_dir)
|
|
74
|
+
|
|
75
|
+
tif_dir = self.download_worldcover_data(worldcover_upath)
|
|
76
|
+
|
|
77
|
+
super().__init__(
|
|
78
|
+
src_dir=tif_dir,
|
|
79
|
+
layer_type=LayerType.RASTER,
|
|
80
|
+
context=context,
|
|
81
|
+
)
|
|
82
|
+
|
|
83
|
+
def download_worldcover_data(self, worldcover_dir: UPath) -> UPath:
|
|
84
|
+
"""Download and extract the WorldCover data.
|
|
85
|
+
|
|
86
|
+
If the data was previously downloaded, this function returns quickly.
|
|
87
|
+
|
|
88
|
+
Args:
|
|
89
|
+
worldcover_dir: the directory to download to.
|
|
90
|
+
|
|
91
|
+
Returns:
|
|
92
|
+
the sub-directory containing GeoTIFFs
|
|
93
|
+
"""
|
|
94
|
+
# Download the zip files (if they don't already exist).
|
|
95
|
+
zip_dir = worldcover_dir / "zips"
|
|
96
|
+
zip_dir.mkdir(parents=True, exist_ok=True)
|
|
97
|
+
for fname in self.ZIP_FILENAMES:
|
|
98
|
+
src_url = self.BASE_URL + fname
|
|
99
|
+
dst_fname = zip_dir / fname
|
|
100
|
+
if dst_fname.exists():
|
|
101
|
+
logger.debug("%s has already been downloaded at %s", fname, dst_fname)
|
|
102
|
+
continue
|
|
103
|
+
logger.info("downloading %s to %s", src_url, dst_fname)
|
|
104
|
+
with requests.get(src_url, stream=True, timeout=self.TIMEOUT_SECONDS) as r:
|
|
105
|
+
r.raise_for_status()
|
|
106
|
+
with open_atomic(dst_fname, "wb") as f:
|
|
107
|
+
for chunk in r.iter_content(chunk_size=8192):
|
|
108
|
+
f.write(chunk)
|
|
109
|
+
|
|
110
|
+
# Extract the zip files.
|
|
111
|
+
# We use a .extraction_complete file to indicate that the extraction is done.
|
|
112
|
+
tif_dir = worldcover_dir / "tifs"
|
|
113
|
+
tif_dir.mkdir(parents=True, exist_ok=True)
|
|
114
|
+
for fname in self.ZIP_FILENAMES:
|
|
115
|
+
zip_fname = zip_dir / fname
|
|
116
|
+
completed_fname = zip_dir / (fname + ".extraction_complete")
|
|
117
|
+
if completed_fname.exists():
|
|
118
|
+
logger.debug("%s has already been extracted", fname)
|
|
119
|
+
continue
|
|
120
|
+
logger.info("extracting %s to %s", fname, tif_dir)
|
|
121
|
+
|
|
122
|
+
# If the tif_dir is remote, we need to extract to a temporary local
|
|
123
|
+
# directory first and then copy it over.
|
|
124
|
+
if isinstance(tif_dir.fs, LocalFileSystem):
|
|
125
|
+
local_dir = tif_dir.path
|
|
126
|
+
else:
|
|
127
|
+
tmp_dir = tempfile.TemporaryDirectory()
|
|
128
|
+
local_dir = tmp_dir.name
|
|
129
|
+
|
|
130
|
+
with get_upath_local(zip_fname) as local_fname:
|
|
131
|
+
with zipfile.ZipFile(local_fname) as zip_f:
|
|
132
|
+
zip_f.extractall(local_dir)
|
|
133
|
+
|
|
134
|
+
# Copy it over if the tif_dir was remote.
|
|
135
|
+
if not isinstance(tif_dir.fs, LocalFileSystem):
|
|
136
|
+
for fname in os.listdir(local_dir):
|
|
137
|
+
with open(os.path.join(local_dir, fname), "rb") as src:
|
|
138
|
+
with (tif_dir / fname).open("wb") as dst:
|
|
139
|
+
shutil.copyfileobj(src, dst)
|
|
140
|
+
|
|
141
|
+
# Mark the extraction complete.
|
|
142
|
+
completed_fname.touch()
|
|
143
|
+
|
|
144
|
+
return tif_dir
|