rslearn 0.0.1__py3-none-any.whl → 0.0.21__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (166) hide show
  1. rslearn/arg_parser.py +31 -0
  2. rslearn/config/__init__.py +6 -12
  3. rslearn/config/dataset.py +520 -401
  4. rslearn/const.py +9 -15
  5. rslearn/data_sources/__init__.py +8 -23
  6. rslearn/data_sources/aws_landsat.py +242 -98
  7. rslearn/data_sources/aws_open_data.py +111 -151
  8. rslearn/data_sources/aws_sentinel1.py +131 -0
  9. rslearn/data_sources/climate_data_store.py +471 -0
  10. rslearn/data_sources/copernicus.py +884 -12
  11. rslearn/data_sources/data_source.py +43 -12
  12. rslearn/data_sources/earthdaily.py +484 -0
  13. rslearn/data_sources/earthdata_srtm.py +282 -0
  14. rslearn/data_sources/eurocrops.py +242 -0
  15. rslearn/data_sources/gcp_public_data.py +578 -222
  16. rslearn/data_sources/google_earth_engine.py +461 -135
  17. rslearn/data_sources/local_files.py +219 -150
  18. rslearn/data_sources/openstreetmap.py +51 -89
  19. rslearn/data_sources/planet.py +24 -60
  20. rslearn/data_sources/planet_basemap.py +275 -0
  21. rslearn/data_sources/planetary_computer.py +798 -0
  22. rslearn/data_sources/usda_cdl.py +195 -0
  23. rslearn/data_sources/usgs_landsat.py +115 -83
  24. rslearn/data_sources/utils.py +249 -61
  25. rslearn/data_sources/vector_source.py +1 -0
  26. rslearn/data_sources/worldcereal.py +449 -0
  27. rslearn/data_sources/worldcover.py +144 -0
  28. rslearn/data_sources/worldpop.py +153 -0
  29. rslearn/data_sources/xyz_tiles.py +150 -107
  30. rslearn/dataset/__init__.py +8 -2
  31. rslearn/dataset/add_windows.py +2 -2
  32. rslearn/dataset/dataset.py +40 -51
  33. rslearn/dataset/handler_summaries.py +131 -0
  34. rslearn/dataset/manage.py +313 -74
  35. rslearn/dataset/materialize.py +431 -107
  36. rslearn/dataset/remap.py +29 -4
  37. rslearn/dataset/storage/__init__.py +1 -0
  38. rslearn/dataset/storage/file.py +202 -0
  39. rslearn/dataset/storage/storage.py +140 -0
  40. rslearn/dataset/window.py +181 -44
  41. rslearn/lightning_cli.py +454 -0
  42. rslearn/log_utils.py +24 -0
  43. rslearn/main.py +384 -181
  44. rslearn/models/anysat.py +215 -0
  45. rslearn/models/attention_pooling.py +177 -0
  46. rslearn/models/clay/clay.py +231 -0
  47. rslearn/models/clay/configs/metadata.yaml +295 -0
  48. rslearn/models/clip.py +68 -0
  49. rslearn/models/component.py +111 -0
  50. rslearn/models/concatenate_features.py +103 -0
  51. rslearn/models/conv.py +63 -0
  52. rslearn/models/croma.py +306 -0
  53. rslearn/models/detr/__init__.py +5 -0
  54. rslearn/models/detr/box_ops.py +103 -0
  55. rslearn/models/detr/detr.py +504 -0
  56. rslearn/models/detr/matcher.py +107 -0
  57. rslearn/models/detr/position_encoding.py +114 -0
  58. rslearn/models/detr/transformer.py +429 -0
  59. rslearn/models/detr/util.py +24 -0
  60. rslearn/models/dinov3.py +177 -0
  61. rslearn/models/faster_rcnn.py +30 -28
  62. rslearn/models/feature_center_crop.py +53 -0
  63. rslearn/models/fpn.py +19 -8
  64. rslearn/models/galileo/__init__.py +5 -0
  65. rslearn/models/galileo/galileo.py +595 -0
  66. rslearn/models/galileo/single_file_galileo.py +1678 -0
  67. rslearn/models/module_wrapper.py +65 -0
  68. rslearn/models/molmo.py +69 -0
  69. rslearn/models/multitask.py +384 -28
  70. rslearn/models/olmoearth_pretrain/__init__.py +1 -0
  71. rslearn/models/olmoearth_pretrain/model.py +421 -0
  72. rslearn/models/olmoearth_pretrain/norm.py +86 -0
  73. rslearn/models/panopticon.py +170 -0
  74. rslearn/models/panopticon_data/sensors/drone.yaml +32 -0
  75. rslearn/models/panopticon_data/sensors/enmap.yaml +904 -0
  76. rslearn/models/panopticon_data/sensors/goes.yaml +9 -0
  77. rslearn/models/panopticon_data/sensors/himawari.yaml +9 -0
  78. rslearn/models/panopticon_data/sensors/intuition.yaml +606 -0
  79. rslearn/models/panopticon_data/sensors/landsat8.yaml +84 -0
  80. rslearn/models/panopticon_data/sensors/modis_terra.yaml +99 -0
  81. rslearn/models/panopticon_data/sensors/qb2_ge1.yaml +34 -0
  82. rslearn/models/panopticon_data/sensors/sentinel1.yaml +85 -0
  83. rslearn/models/panopticon_data/sensors/sentinel2.yaml +97 -0
  84. rslearn/models/panopticon_data/sensors/superdove.yaml +60 -0
  85. rslearn/models/panopticon_data/sensors/wv23.yaml +63 -0
  86. rslearn/models/pick_features.py +17 -10
  87. rslearn/models/pooling_decoder.py +60 -7
  88. rslearn/models/presto/__init__.py +5 -0
  89. rslearn/models/presto/presto.py +297 -0
  90. rslearn/models/presto/single_file_presto.py +926 -0
  91. rslearn/models/prithvi.py +1147 -0
  92. rslearn/models/resize_features.py +59 -0
  93. rslearn/models/sam2_enc.py +13 -9
  94. rslearn/models/satlaspretrain.py +38 -18
  95. rslearn/models/simple_time_series.py +188 -77
  96. rslearn/models/singletask.py +24 -13
  97. rslearn/models/ssl4eo_s12.py +40 -30
  98. rslearn/models/swin.py +44 -32
  99. rslearn/models/task_embedding.py +250 -0
  100. rslearn/models/terramind.py +256 -0
  101. rslearn/models/trunk.py +139 -0
  102. rslearn/models/unet.py +68 -22
  103. rslearn/models/upsample.py +48 -0
  104. rslearn/models/use_croma.py +508 -0
  105. rslearn/template_params.py +26 -0
  106. rslearn/tile_stores/__init__.py +41 -18
  107. rslearn/tile_stores/default.py +409 -0
  108. rslearn/tile_stores/tile_store.py +236 -132
  109. rslearn/train/all_patches_dataset.py +530 -0
  110. rslearn/train/callbacks/adapters.py +53 -0
  111. rslearn/train/callbacks/freeze_unfreeze.py +348 -17
  112. rslearn/train/callbacks/gradients.py +129 -0
  113. rslearn/train/callbacks/peft.py +116 -0
  114. rslearn/train/data_module.py +444 -20
  115. rslearn/train/dataset.py +588 -235
  116. rslearn/train/lightning_module.py +192 -62
  117. rslearn/train/model_context.py +88 -0
  118. rslearn/train/optimizer.py +31 -0
  119. rslearn/train/prediction_writer.py +319 -84
  120. rslearn/train/scheduler.py +92 -0
  121. rslearn/train/tasks/classification.py +55 -28
  122. rslearn/train/tasks/detection.py +132 -76
  123. rslearn/train/tasks/embedding.py +120 -0
  124. rslearn/train/tasks/multi_task.py +28 -14
  125. rslearn/train/tasks/per_pixel_regression.py +291 -0
  126. rslearn/train/tasks/regression.py +161 -44
  127. rslearn/train/tasks/segmentation.py +428 -53
  128. rslearn/train/tasks/task.py +6 -5
  129. rslearn/train/transforms/__init__.py +1 -1
  130. rslearn/train/transforms/concatenate.py +54 -10
  131. rslearn/train/transforms/crop.py +29 -11
  132. rslearn/train/transforms/flip.py +18 -6
  133. rslearn/train/transforms/mask.py +78 -0
  134. rslearn/train/transforms/normalize.py +101 -17
  135. rslearn/train/transforms/pad.py +19 -7
  136. rslearn/train/transforms/resize.py +83 -0
  137. rslearn/train/transforms/select_bands.py +76 -0
  138. rslearn/train/transforms/sentinel1.py +75 -0
  139. rslearn/train/transforms/transform.py +89 -70
  140. rslearn/utils/__init__.py +2 -6
  141. rslearn/utils/array.py +8 -6
  142. rslearn/utils/feature.py +2 -2
  143. rslearn/utils/fsspec.py +90 -1
  144. rslearn/utils/geometry.py +347 -7
  145. rslearn/utils/get_utm_ups_crs.py +2 -3
  146. rslearn/utils/grid_index.py +5 -5
  147. rslearn/utils/jsonargparse.py +178 -0
  148. rslearn/utils/mp.py +4 -3
  149. rslearn/utils/raster_format.py +268 -116
  150. rslearn/utils/rtree_index.py +64 -17
  151. rslearn/utils/sqlite_index.py +7 -1
  152. rslearn/utils/vector_format.py +252 -97
  153. {rslearn-0.0.1.dist-info → rslearn-0.0.21.dist-info}/METADATA +532 -283
  154. rslearn-0.0.21.dist-info/RECORD +167 -0
  155. {rslearn-0.0.1.dist-info → rslearn-0.0.21.dist-info}/WHEEL +1 -1
  156. rslearn-0.0.21.dist-info/licenses/NOTICE +115 -0
  157. rslearn/data_sources/raster_source.py +0 -309
  158. rslearn/models/registry.py +0 -5
  159. rslearn/tile_stores/file.py +0 -242
  160. rslearn/utils/mgrs.py +0 -24
  161. rslearn/utils/utils.py +0 -22
  162. rslearn-0.0.1.dist-info/RECORD +0 -88
  163. /rslearn/{data_sources/geotiff.py → py.typed} +0 -0
  164. {rslearn-0.0.1.dist-info → rslearn-0.0.21.dist-info}/entry_points.txt +0 -0
  165. {rslearn-0.0.1.dist-info → rslearn-0.0.21.dist-info/licenses}/LICENSE +0 -0
  166. {rslearn-0.0.1.dist-info → rslearn-0.0.21.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,449 @@
1
+ """Data source for ESA WorldCover 2021."""
2
+
3
+ import functools
4
+ import json
5
+ import os
6
+ import shutil
7
+ import tempfile
8
+ import zipfile
9
+
10
+ import requests
11
+ from fsspec.implementations.local import LocalFileSystem
12
+ from upath import UPath
13
+
14
+ from rslearn.config import LayerType
15
+ from rslearn.data_sources.local_files import LocalFiles, RasterItemSpec
16
+ from rslearn.log_utils import get_logger
17
+ from rslearn.utils.fsspec import get_upath_local, join_upath, open_atomic
18
+
19
+ from .data_source import DataSourceContext, Item
20
+
21
+ logger = get_logger(__name__)
22
+
23
+
24
+ class WorldCereal(LocalFiles):
25
+ """A data source for the ESA WorldCereal 2021 agricultural land cover map.
26
+
27
+ For details about the land cover map, see https://esa-worldcereal.org/en.
28
+ """
29
+
30
+ ZENODO_RECORD_ID = 7875105
31
+ ZENODO_URL = f"https://zenodo.org/api/deposit/depositions/{ZENODO_RECORD_ID}/files"
32
+
33
+ # these are the subset of filenames we want to download, which contain the
34
+ # model confidence values. This defines the order of the bands in the
35
+ # final output tif files
36
+ ZIP_FILENAMES = [
37
+ "WorldCereal_2021_tc-annual_temporarycrops_confidence.zip",
38
+ "WorldCereal_2021_tc-annual_temporarycrops_classification.zip",
39
+ "WorldCereal_2021_tc-maize-main_irrigation_confidence.zip",
40
+ "WorldCereal_2021_tc-maize-main_irrigation_classification.zip",
41
+ "WorldCereal_2021_tc-maize-main_maize_confidence.zip",
42
+ "WorldCereal_2021_tc-maize-main_maize_classification.zip",
43
+ "WorldCereal_2021_tc-maize-second_irrigation_confidence.zip",
44
+ "WorldCereal_2021_tc-maize-second_irrigation_classification.zip",
45
+ "WorldCereal_2021_tc-maize-second_maize_confidence.zip",
46
+ "WorldCereal_2021_tc-maize-second_maize_classification.zip",
47
+ "WorldCereal_2021_tc-springcereals_springcereals_confidence.zip",
48
+ "WorldCereal_2021_tc-springcereals_springcereals_classification.zip",
49
+ "WorldCereal_2021_tc-wintercereals_irrigation_confidence.zip",
50
+ "WorldCereal_2021_tc-wintercereals_irrigation_classification.zip",
51
+ "WorldCereal_2021_tc-wintercereals_wintercereals_confidence.zip",
52
+ "WorldCereal_2021_tc-wintercereals_wintercereals_classification.zip",
53
+ ]
54
+ TIMEOUT_SECONDS = 10
55
+
56
+ # this can be obtained using the following code:
57
+ # ```
58
+ # response = requests.get(cls.ZENODO_URL)
59
+ # response.raise_for_status()
60
+ # ZENODO_FILES_DATA = response.json()
61
+ # ```
62
+ # we hardcode it here because othewerwise we get complaints from
63
+ # zenodo about repeatedly asking for it.
64
+ ZENODO_FILES_DATA: list[dict] = [
65
+ {
66
+ "id": "21551c80-0df9-4add-abaa-b66fff68179c",
67
+ "filename": "WorldCereal_2021_tc-annual_temporarycrops_classification.zip",
68
+ "filesize": 15500797967.0,
69
+ "checksum": "c006c34fca0253251a8d1ea73cf837a8",
70
+ "links": {
71
+ "self": "https://zenodo.org/api/deposit/depositions/7875105/files/21551c80-0df9-4add-abaa-b66fff68179c",
72
+ "download": "https://zenodo.org/api/records/7875105/files/WorldCereal_2021_tc-annual_temporarycrops_classification.zip/content",
73
+ },
74
+ },
75
+ {
76
+ "id": "2fed6859-5729-4ab1-9d33-e15464c99a5b",
77
+ "filename": "WorldCereal_2021_tc-annual_temporarycrops_confidence.zip",
78
+ "filesize": 24969180828.0,
79
+ "checksum": "84a953be71292d02cceb6c64b2008ad7",
80
+ "links": {
81
+ "self": "https://zenodo.org/api/deposit/depositions/7875105/files/2fed6859-5729-4ab1-9d33-e15464c99a5b",
82
+ "download": "https://zenodo.org/api/records/7875105/files/WorldCereal_2021_tc-annual_temporarycrops_confidence.zip/content",
83
+ },
84
+ },
85
+ {
86
+ "id": "2cab95a8-24d9-45cf-ac70-67fa4b6bda64",
87
+ "filename": "WorldCereal_2021_tc-maize-main_irrigation_classification.zip",
88
+ "filesize": 17247922829.0,
89
+ "checksum": "ceaf240dc4bba5e19491dd3c9893ae34",
90
+ "links": {
91
+ "self": "https://zenodo.org/api/deposit/depositions/7875105/files/2cab95a8-24d9-45cf-ac70-67fa4b6bda64",
92
+ "download": "https://zenodo.org/api/records/7875105/files/WorldCereal_2021_tc-maize-main_irrigation_classification.zip/content",
93
+ },
94
+ },
95
+ {
96
+ "id": "54d63601-cda8-4f10-8710-a2068e697418",
97
+ "filename": "WorldCereal_2021_tc-maize-main_irrigation_confidence.zip",
98
+ "filesize": 11327157543.0,
99
+ "checksum": "c509ee2cb8b6fc44383788ffaa248950",
100
+ "links": {
101
+ "self": "https://zenodo.org/api/deposit/depositions/7875105/files/54d63601-cda8-4f10-8710-a2068e697418",
102
+ "download": "https://zenodo.org/api/records/7875105/files/WorldCereal_2021_tc-maize-main_irrigation_confidence.zip/content",
103
+ },
104
+ },
105
+ {
106
+ "id": "b2278b6c-c2f5-49c1-8ebc-e828dbf8c27d",
107
+ "filename": "WorldCereal_2021_tc-maize-main_maize_classification.zip",
108
+ "filesize": 18210475632.0,
109
+ "checksum": "ff298db1b654b91fcfa27495d878932d",
110
+ "links": {
111
+ "self": "https://zenodo.org/api/deposit/depositions/7875105/files/b2278b6c-c2f5-49c1-8ebc-e828dbf8c27d",
112
+ "download": "https://zenodo.org/api/records/7875105/files/WorldCereal_2021_tc-maize-main_maize_classification.zip/content",
113
+ },
114
+ },
115
+ {
116
+ "id": "277c0d06-b5ae-4748-bad1-c135084276ef",
117
+ "filename": "WorldCereal_2021_tc-maize-main_maize_confidence.zip",
118
+ "filesize": 10442831518.0,
119
+ "checksum": "0e6bb70209a83b526ec146e5e4ed3451",
120
+ "links": {
121
+ "self": "https://zenodo.org/api/deposit/depositions/7875105/files/277c0d06-b5ae-4748-bad1-c135084276ef",
122
+ "download": "https://zenodo.org/api/records/7875105/files/WorldCereal_2021_tc-maize-main_maize_confidence.zip/content",
123
+ },
124
+ },
125
+ {
126
+ "id": "d9c5dbe4-d027-47aa-bb6e-806c9964f73e",
127
+ "filename": "WorldCereal_2021_tc-maize-second_irrigation_classification.zip",
128
+ "filesize": 6703649764.0,
129
+ "checksum": "7221b40181835c5226d357ae3fec434f",
130
+ "links": {
131
+ "self": "https://zenodo.org/api/deposit/depositions/7875105/files/d9c5dbe4-d027-47aa-bb6e-806c9964f73e",
132
+ "download": "https://zenodo.org/api/records/7875105/files/WorldCereal_2021_tc-maize-second_irrigation_classification.zip/content",
133
+ },
134
+ },
135
+ {
136
+ "id": "f47baf24-27d9-4913-a483-ec86ae87e60a",
137
+ "filename": "WorldCereal_2021_tc-maize-second_irrigation_confidence.zip",
138
+ "filesize": 3813149175.0,
139
+ "checksum": "cb8b91155c8fcf38f869875f2cb35200",
140
+ "links": {
141
+ "self": "https://zenodo.org/api/deposit/depositions/7875105/files/f47baf24-27d9-4913-a483-ec86ae87e60a",
142
+ "download": "https://zenodo.org/api/records/7875105/files/WorldCereal_2021_tc-maize-second_irrigation_confidence.zip/content",
143
+ },
144
+ },
145
+ {
146
+ "id": "93ae9f7f-f989-4fc5-837a-d27652b761f7",
147
+ "filename": "WorldCereal_2021_tc-maize-second_maize_classification.zip",
148
+ "filesize": 6917008439.0,
149
+ "checksum": "aa883b52451f878e6b4462d27410707e",
150
+ "links": {
151
+ "self": "https://zenodo.org/api/deposit/depositions/7875105/files/93ae9f7f-f989-4fc5-837a-d27652b761f7",
152
+ "download": "https://zenodo.org/api/records/7875105/files/WorldCereal_2021_tc-maize-second_maize_classification.zip/content",
153
+ },
154
+ },
155
+ {
156
+ "id": "d3a0df02-8034-463f-a923-2bfe0c2719ac",
157
+ "filename": "WorldCereal_2021_tc-maize-second_maize_confidence.zip",
158
+ "filesize": 3752378387.0,
159
+ "checksum": "8a819762b7f3950839b0e832cb346e30",
160
+ "links": {
161
+ "self": "https://zenodo.org/api/deposit/depositions/7875105/files/d3a0df02-8034-463f-a923-2bfe0c2719ac",
162
+ "download": "https://zenodo.org/api/records/7875105/files/WorldCereal_2021_tc-maize-second_maize_confidence.zip/content",
163
+ },
164
+ },
165
+ {
166
+ "id": "7a257437-89fe-4278-94fe-90a66e81e1bd",
167
+ "filename": "WorldCereal_2021_tc-springcereals_springcereals_classification.zip",
168
+ "filesize": 7008931281.0,
169
+ "checksum": "bb6e1124938e3a68b6e47d156f17bf86",
170
+ "links": {
171
+ "self": "https://zenodo.org/api/deposit/depositions/7875105/files/7a257437-89fe-4278-94fe-90a66e81e1bd",
172
+ "download": "https://zenodo.org/api/records/7875105/files/WorldCereal_2021_tc-springcereals_springcereals_classification.zip/content",
173
+ },
174
+ },
175
+ {
176
+ "id": "a0b91677-f110-4df5-a5fd-7b1849895a02",
177
+ "filename": "WorldCereal_2021_tc-springcereals_springcereals_confidence.zip",
178
+ "filesize": 4708773375.0,
179
+ "checksum": "fd8dec8de691738df520c1ab451c7870",
180
+ "links": {
181
+ "self": "https://zenodo.org/api/deposit/depositions/7875105/files/a0b91677-f110-4df5-a5fd-7b1849895a02",
182
+ "download": "https://zenodo.org/api/records/7875105/files/WorldCereal_2021_tc-springcereals_springcereals_confidence.zip/content",
183
+ },
184
+ },
185
+ {
186
+ "id": "a5774a05-ee8e-42df-bf06-68ebc6c14426",
187
+ "filename": "WorldCereal_2021_tc-wintercereals_activecropland_classification.zip",
188
+ "filesize": 20001277863.0,
189
+ "checksum": "3933653452a2e0b821c35091b6f4a035",
190
+ "links": {
191
+ "self": "https://zenodo.org/api/deposit/depositions/7875105/files/a5774a05-ee8e-42df-bf06-68ebc6c14426",
192
+ "download": "https://zenodo.org/api/records/7875105/files/WorldCereal_2021_tc-wintercereals_activecropland_classification.zip/content",
193
+ },
194
+ },
195
+ {
196
+ "id": "5a4adaa6-e50a-469a-b401-6ccca02de443",
197
+ "filename": "WorldCereal_2021_tc-wintercereals_irrigation_classification.zip",
198
+ "filesize": 18019534510.0,
199
+ "checksum": "5032b11cf380d8cef07767e86ef4ee54",
200
+ "links": {
201
+ "self": "https://zenodo.org/api/deposit/depositions/7875105/files/5a4adaa6-e50a-469a-b401-6ccca02de443",
202
+ "download": "https://zenodo.org/api/records/7875105/files/WorldCereal_2021_tc-wintercereals_irrigation_classification.zip/content",
203
+ },
204
+ },
205
+ {
206
+ "id": "23301576-64d2-48a1-9b19-0c126158c24d",
207
+ "filename": "WorldCereal_2021_tc-wintercereals_irrigation_confidence.zip",
208
+ "filesize": 11447731232.0,
209
+ "checksum": "f84c4088ac42bb67f308be50159ca778",
210
+ "links": {
211
+ "self": "https://zenodo.org/api/deposit/depositions/7875105/files/23301576-64d2-48a1-9b19-0c126158c24d",
212
+ "download": "https://zenodo.org/api/records/7875105/files/WorldCereal_2021_tc-wintercereals_irrigation_confidence.zip/content",
213
+ },
214
+ },
215
+ {
216
+ "id": "9ab67c40-9072-44dc-8f6b-892fcaa3c079",
217
+ "filename": "WorldCereal_2021_tc-wintercereals_wintercereals_classification.zip",
218
+ "filesize": 18523882137.0,
219
+ "checksum": "386ce3fca8ba5577e2b62d6f3ea45b27",
220
+ "links": {
221
+ "self": "https://zenodo.org/api/deposit/depositions/7875105/files/9ab67c40-9072-44dc-8f6b-892fcaa3c079",
222
+ "download": "https://zenodo.org/api/records/7875105/files/WorldCereal_2021_tc-wintercereals_wintercereals_classification.zip/content",
223
+ },
224
+ },
225
+ {
226
+ "id": "b4ce9cc1-a745-450a-b2e9-c4fb08059a93",
227
+ "filename": "WorldCereal_2021_tc-wintercereals_wintercereals_confidence.zip",
228
+ "filesize": 10174751452.0,
229
+ "checksum": "5870da83aaa4b3761cad3750feb73e43",
230
+ "links": {
231
+ "self": "https://zenodo.org/api/deposit/depositions/7875105/files/b4ce9cc1-a745-450a-b2e9-c4fb08059a93",
232
+ "download": "https://zenodo.org/api/records/7875105/files/WorldCereal_2021_tc-wintercereals_wintercereals_confidence.zip/content",
233
+ },
234
+ },
235
+ ]
236
+
237
+ def __init__(
238
+ self,
239
+ worldcereal_dir: str,
240
+ band: str | None = None,
241
+ context: DataSourceContext = DataSourceContext(),
242
+ ) -> None:
243
+ """Create a new WorldCereal.
244
+
245
+ Args:
246
+ worldcereal_dir: the directory to extract the WorldCereal GeoTIFF files. For
247
+ high performance, this should be a local directory; if the dataset is
248
+ remote, prefix with a protocol ("file://") to use a local directory
249
+ instead of a path relative to the dataset path.
250
+ band: the worldcereal band to process. This will only be used if the layer
251
+ config is missing from the context.
252
+ context: the data source context.
253
+ """
254
+ if context.ds_path is not None:
255
+ worldcereal_upath = join_upath(context.ds_path, worldcereal_dir)
256
+ else:
257
+ worldcereal_upath = UPath(worldcereal_dir)
258
+
259
+ if context.layer_config is not None:
260
+ if len(context.layer_config.band_sets) != 1:
261
+ raise ValueError("expected a single band set")
262
+ if len(context.layer_config.band_sets[0].bands) != 1:
263
+ raise ValueError("expected band set to have a single band")
264
+ self.band = context.layer_config.band_sets[0].bands[0]
265
+ elif band is not None:
266
+ self.band = band
267
+ else:
268
+ raise ValueError("band must be set if layer config is not in the context")
269
+
270
+ tif_dir, tif_filepath = self.download_worldcereal_data(
271
+ self.band, worldcereal_upath
272
+ )
273
+ all_aezs: set[int] = self.all_aezs_from_tifs(tif_filepath)
274
+
275
+ # now that we have all our aezs, lets match them to the bands
276
+ item_specs: list[RasterItemSpec] = []
277
+ for aez in all_aezs:
278
+ item_spec = RasterItemSpec(
279
+ fnames=[],
280
+ bands=[],
281
+ # must be a str since we / with a posix path later
282
+ name=str(aez),
283
+ )
284
+ aez_band_filepath = self.filepath_for_product_aez(tif_filepath, aez)
285
+ if aez_band_filepath is not None:
286
+ item_spec.fnames.append(aez_band_filepath.absolute().as_uri())
287
+ assert item_spec.bands is not None
288
+ item_spec.bands.append([self.band])
289
+ item_specs.append(item_spec)
290
+ if len(item_specs) == 0:
291
+ raise ValueError(f"No AEZ files found for {self.band}")
292
+
293
+ super().__init__(
294
+ src_dir=tif_dir,
295
+ raster_item_specs=item_specs,
296
+ layer_type=LayerType.RASTER,
297
+ context=context,
298
+ )
299
+
300
+ @staticmethod
301
+ def band_from_zipfilename(filename: str) -> str:
302
+ """Return the band name given the zipfilename."""
303
+ # [:-4] to remove ".zip"
304
+ _, _, season, product, confidence_or_classification = filename[:-4].split("_")
305
+ # band names must not contain '_'
306
+ return "-".join([season, product, confidence_or_classification])
307
+
308
+ @staticmethod
309
+ def zip_filepath_from_filename(filename: str) -> str:
310
+ """Given a filename, return the filepath of the extracted tifs."""
311
+ _, _, season, product, confidence_or_classification = filename[:-4].split("_")
312
+ prefix = "data/worldcereal_data/MAP-v3/2021"
313
+ if confidence_or_classification == "confidence":
314
+ aez_name = "aez_downsampled"
315
+ else:
316
+ aez_name = "aez"
317
+ # [:-4] to remove ".zip"
318
+
319
+ return f"{prefix}/{season}/{product}/{aez_name}/{confidence_or_classification}"
320
+
321
+ @staticmethod
322
+ def all_aezs_from_tifs(filepath: UPath) -> set[int]:
323
+ """Given a filepath containing many tif files, extract all the AEZs."""
324
+ all_tifs = filepath.glob("*.tif")
325
+ aezs: set = set()
326
+ for tif_file in all_tifs:
327
+ aezs.add(int(tif_file.name.split("_")[0]))
328
+ return aezs
329
+
330
+ @staticmethod
331
+ def filepath_for_product_aez(path_to_tifs: UPath, aez: int) -> UPath | None:
332
+ """Given a path for the tifs for a band and an aez, return the tif file if it exists."""
333
+ aez_file = list(path_to_tifs.glob(f"{aez}_*.tif"))
334
+ if len(aez_file) == 0:
335
+ return None
336
+ elif len(aez_file) == 1:
337
+ return aez_file[0]
338
+ raise ValueError(f"Got more than one tif for {aez} in {path_to_tifs}")
339
+
340
+ @classmethod
341
+ def download_worldcereal_data(
342
+ cls, band: str, worldcereal_dir: UPath
343
+ ) -> tuple[UPath, dict[str, UPath]]:
344
+ """Download and extract the WorldCereal data.
345
+
346
+ If the data was previously downloaded, this function returns quickly.
347
+
348
+ Args:
349
+ band: the worldcereal band to download.
350
+ worldcereal_dir: the directory to download to.
351
+
352
+ Returns:
353
+ tif_dir: the sub-directory containing GeoTIFFs
354
+ tif_filepaths: tif dir is nested (i.e. tif_dir points to "data" while the tifs
355
+ are actually in "data/worldcereal/MAP-v3/2021..."). This points to the
356
+ specific directories containing the tifs for each band.
357
+ """
358
+ # Download the zip files (if they don't already exist).
359
+ zip_dir = worldcereal_dir / "zips"
360
+ zip_dir.mkdir(parents=True, exist_ok=True)
361
+ logger.debug(f"Worldcereal zipfile: {zip_dir}")
362
+
363
+ # Fetch list of files from Zenodo's Deposition Files API
364
+ # f["filename"] maps to the ZIP_FILENAMES
365
+ files_to_download = [
366
+ f
367
+ for f in cls.ZENODO_FILES_DATA
368
+ if cls.band_from_zipfilename(f["filename"]) == band
369
+ ]
370
+ if len(files_to_download) != 1:
371
+ raise ValueError(
372
+ f"Got != 1 suitable filenames for {band}: {[f['filename'] for f in files_to_download]}"
373
+ )
374
+ file_to_download = files_to_download[0]
375
+ # now its also in the right order for when we generate the files
376
+ filename: str = file_to_download["filename"]
377
+ if filename not in cls.ZIP_FILENAMES:
378
+ raise ValueError(f"Unsupported filename {filename} for band {band}")
379
+ file_url = file_to_download["links"]["download"]
380
+ # Determine full filepath and create necessary folders for nested structure
381
+ zip_filepath = zip_dir / filename
382
+ if not zip_filepath.exists():
383
+ # Download the file with resume support
384
+ logger.debug(f"Downloading {file_url} to {zip_filepath}")
385
+ with requests.get(file_url, stream=True, timeout=cls.TIMEOUT_SECONDS) as r:
386
+ r.raise_for_status()
387
+ with open_atomic(zip_filepath, "wb") as f:
388
+ for chunk in r.iter_content(chunk_size=8192):
389
+ f.write(chunk)
390
+
391
+ # Extract the zip files.
392
+ # We use a .extraction_complete file to indicate that the extraction is done.
393
+ tif_dir = worldcereal_dir / "tifs"
394
+ tif_dir.mkdir(parents=True, exist_ok=True)
395
+
396
+ completed_fname = zip_dir / (filename + ".extraction_complete")
397
+ if completed_fname.exists():
398
+ logger.debug("%s has already been extracted", filename)
399
+ else:
400
+ logger.debug("extracting %s to %s", filename, tif_dir)
401
+
402
+ # If the tif_dir is remote, we need to extract to a temporary local
403
+ # directory first and then copy it over.
404
+ if isinstance(tif_dir.fs, LocalFileSystem):
405
+ local_dir = tif_dir.path
406
+ else:
407
+ tmp_dir = tempfile.TemporaryDirectory()
408
+ local_dir = tmp_dir.name
409
+
410
+ with get_upath_local(zip_filepath) as local_fname:
411
+ with zipfile.ZipFile(local_fname) as zip_f:
412
+ zip_f.extractall(local_dir)
413
+
414
+ # Copy it over if the tif_dir was remote.
415
+ if not isinstance(tif_dir.fs, LocalFileSystem):
416
+ for fname in os.listdir(local_dir):
417
+ with open(os.path.join(local_dir, fname), "rb") as src:
418
+ with (tif_dir / fname).open("wb") as dst:
419
+ shutil.copyfileobj(src, dst)
420
+
421
+ # Mark the extraction complete.
422
+ completed_fname.touch()
423
+ tif_filepath = tif_dir / cls.zip_filepath_from_filename(filename)
424
+
425
+ return tif_dir, tif_filepath
426
+
427
+ @functools.cache
428
+ def list_items(self) -> list[Item]:
429
+ """Lists items from the source directory while maintaining a cache file.
430
+
431
+ This is identical to LocalFiles.list_items except that a unique summary
432
+ is made per band (since we treat each band separately now.)
433
+ """
434
+ cache_fname = self.src_dir / f"{self.band}_summary.json"
435
+ if not cache_fname.exists():
436
+ logger.debug("cache at %s does not exist, listing items", cache_fname)
437
+ items = self.importer.list_items(self.src_dir)
438
+ serialized_items = [item.serialize() for item in items]
439
+ with cache_fname.open("w") as f:
440
+ json.dump(serialized_items, f)
441
+ return items
442
+
443
+ logger.debug("loading item list from cache at %s", cache_fname)
444
+ with cache_fname.open() as f:
445
+ serialized_items = json.load(f)
446
+ return [
447
+ self.deserialize_item(serialized_item)
448
+ for serialized_item in serialized_items
449
+ ]
@@ -0,0 +1,144 @@
1
+ """Data source for ESA WorldCover 2021."""
2
+
3
+ import os
4
+ import shutil
5
+ import tempfile
6
+ import zipfile
7
+
8
+ import requests
9
+ from fsspec.implementations.local import LocalFileSystem
10
+ from upath import UPath
11
+
12
+ from rslearn.config import LayerType
13
+ from rslearn.data_sources import DataSourceContext
14
+ from rslearn.data_sources.local_files import LocalFiles
15
+ from rslearn.log_utils import get_logger
16
+ from rslearn.utils.fsspec import get_upath_local, join_upath, open_atomic
17
+
18
+ logger = get_logger(__name__)
19
+
20
+
21
+ class WorldCover(LocalFiles):
22
+ """A data source for the ESA WorldCover 2021 land cover map.
23
+
24
+ For details about the land cover map, see https://worldcover2021.esa.int/.
25
+
26
+ This data source downloads the 18 zip files that contain the map. They are then
27
+ extracted, yielding 2,651 GeoTIFF files. These are then used with
28
+ rslearn.data_sources.local_files.LocalFiles to implement the data source.
29
+ """
30
+
31
+ BASE_URL = "https://worldcover2021.esa.int/data/archive/"
32
+ ZIP_FILENAMES = [
33
+ "ESA_WorldCover_10m_2021_v200_60deg_macrotile_N30E000.zip",
34
+ "ESA_WorldCover_10m_2021_v200_60deg_macrotile_N30E060.zip",
35
+ "ESA_WorldCover_10m_2021_v200_60deg_macrotile_N30E120.zip",
36
+ "ESA_WorldCover_10m_2021_v200_60deg_macrotile_N30W060.zip",
37
+ "ESA_WorldCover_10m_2021_v200_60deg_macrotile_N30W120.zip",
38
+ "ESA_WorldCover_10m_2021_v200_60deg_macrotile_N30W180.zip",
39
+ "ESA_WorldCover_10m_2021_v200_60deg_macrotile_S30E000.zip",
40
+ "ESA_WorldCover_10m_2021_v200_60deg_macrotile_S30E060.zip",
41
+ "ESA_WorldCover_10m_2021_v200_60deg_macrotile_S30E120.zip",
42
+ "ESA_WorldCover_10m_2021_v200_60deg_macrotile_S30W060.zip",
43
+ "ESA_WorldCover_10m_2021_v200_60deg_macrotile_S30W120.zip",
44
+ "ESA_WorldCover_10m_2021_v200_60deg_macrotile_S30W180.zip",
45
+ "ESA_WorldCover_10m_2021_v200_60deg_macrotile_S90E000.zip",
46
+ "ESA_WorldCover_10m_2021_v200_60deg_macrotile_S90E060.zip",
47
+ "ESA_WorldCover_10m_2021_v200_60deg_macrotile_S90E120.zip",
48
+ "ESA_WorldCover_10m_2021_v200_60deg_macrotile_S90W060.zip",
49
+ "ESA_WorldCover_10m_2021_v200_60deg_macrotile_S90W120.zip",
50
+ "ESA_WorldCover_10m_2021_v200_60deg_macrotile_S90W180.zip",
51
+ ]
52
+ TIMEOUT_SECONDS = 10
53
+
54
+ def __init__(
55
+ self,
56
+ worldcover_dir: str,
57
+ context: DataSourceContext = DataSourceContext(),
58
+ ) -> None:
59
+ """Create a new WorldCover.
60
+
61
+ Args:
62
+ config: configuration for this layer. It should specify a single band
63
+ called B1 which will contain the land cover class.
64
+ worldcover_dir: the directory to extract the WorldCover GeoTIFF files. For
65
+ high performance, this should be a local directory; if the dataset is
66
+ remote, prefix with a protocol ("file://") to use a local directory
67
+ instead of a path relative to the dataset path.
68
+ context: the data source context.
69
+ """
70
+ if context.ds_path is not None:
71
+ worldcover_upath = join_upath(context.ds_path, worldcover_dir)
72
+ else:
73
+ worldcover_upath = UPath(worldcover_dir)
74
+
75
+ tif_dir = self.download_worldcover_data(worldcover_upath)
76
+
77
+ super().__init__(
78
+ src_dir=tif_dir,
79
+ layer_type=LayerType.RASTER,
80
+ context=context,
81
+ )
82
+
83
+ def download_worldcover_data(self, worldcover_dir: UPath) -> UPath:
84
+ """Download and extract the WorldCover data.
85
+
86
+ If the data was previously downloaded, this function returns quickly.
87
+
88
+ Args:
89
+ worldcover_dir: the directory to download to.
90
+
91
+ Returns:
92
+ the sub-directory containing GeoTIFFs
93
+ """
94
+ # Download the zip files (if they don't already exist).
95
+ zip_dir = worldcover_dir / "zips"
96
+ zip_dir.mkdir(parents=True, exist_ok=True)
97
+ for fname in self.ZIP_FILENAMES:
98
+ src_url = self.BASE_URL + fname
99
+ dst_fname = zip_dir / fname
100
+ if dst_fname.exists():
101
+ logger.debug("%s has already been downloaded at %s", fname, dst_fname)
102
+ continue
103
+ logger.info("downloading %s to %s", src_url, dst_fname)
104
+ with requests.get(src_url, stream=True, timeout=self.TIMEOUT_SECONDS) as r:
105
+ r.raise_for_status()
106
+ with open_atomic(dst_fname, "wb") as f:
107
+ for chunk in r.iter_content(chunk_size=8192):
108
+ f.write(chunk)
109
+
110
+ # Extract the zip files.
111
+ # We use a .extraction_complete file to indicate that the extraction is done.
112
+ tif_dir = worldcover_dir / "tifs"
113
+ tif_dir.mkdir(parents=True, exist_ok=True)
114
+ for fname in self.ZIP_FILENAMES:
115
+ zip_fname = zip_dir / fname
116
+ completed_fname = zip_dir / (fname + ".extraction_complete")
117
+ if completed_fname.exists():
118
+ logger.debug("%s has already been extracted", fname)
119
+ continue
120
+ logger.info("extracting %s to %s", fname, tif_dir)
121
+
122
+ # If the tif_dir is remote, we need to extract to a temporary local
123
+ # directory first and then copy it over.
124
+ if isinstance(tif_dir.fs, LocalFileSystem):
125
+ local_dir = tif_dir.path
126
+ else:
127
+ tmp_dir = tempfile.TemporaryDirectory()
128
+ local_dir = tmp_dir.name
129
+
130
+ with get_upath_local(zip_fname) as local_fname:
131
+ with zipfile.ZipFile(local_fname) as zip_f:
132
+ zip_f.extractall(local_dir)
133
+
134
+ # Copy it over if the tif_dir was remote.
135
+ if not isinstance(tif_dir.fs, LocalFileSystem):
136
+ for fname in os.listdir(local_dir):
137
+ with open(os.path.join(local_dir, fname), "rb") as src:
138
+ with (tif_dir / fname).open("wb") as dst:
139
+ shutil.copyfileobj(src, dst)
140
+
141
+ # Mark the extraction complete.
142
+ completed_fname.touch()
143
+
144
+ return tif_dir