rslearn 0.0.1__tar.gz → 0.0.3__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (156) hide show
  1. {rslearn-0.0.1/rslearn.egg-info → rslearn-0.0.3}/PKG-INFO +365 -283
  2. rslearn-0.0.3/README.md +508 -0
  3. rslearn-0.0.3/pyproject.toml +106 -0
  4. {rslearn-0.0.1 → rslearn-0.0.3}/rslearn/config/__init__.py +2 -2
  5. {rslearn-0.0.1 → rslearn-0.0.3}/rslearn/config/dataset.py +164 -98
  6. rslearn-0.0.3/rslearn/const.py +17 -0
  7. {rslearn-0.0.1 → rslearn-0.0.3}/rslearn/data_sources/__init__.py +8 -0
  8. {rslearn-0.0.1 → rslearn-0.0.3}/rslearn/data_sources/aws_landsat.py +235 -80
  9. {rslearn-0.0.1 → rslearn-0.0.3}/rslearn/data_sources/aws_open_data.py +103 -118
  10. rslearn-0.0.3/rslearn/data_sources/aws_sentinel1.py +142 -0
  11. rslearn-0.0.3/rslearn/data_sources/climate_data_store.py +303 -0
  12. rslearn-0.0.3/rslearn/data_sources/copernicus.py +973 -0
  13. {rslearn-0.0.1 → rslearn-0.0.3}/rslearn/data_sources/data_source.py +17 -12
  14. rslearn-0.0.3/rslearn/data_sources/earthdaily.py +489 -0
  15. rslearn-0.0.3/rslearn/data_sources/earthdata_srtm.py +300 -0
  16. rslearn-0.0.3/rslearn/data_sources/gcp_public_data.py +882 -0
  17. rslearn-0.0.3/rslearn/data_sources/geotiff.py +1 -0
  18. rslearn-0.0.3/rslearn/data_sources/google_earth_engine.py +637 -0
  19. {rslearn-0.0.1 → rslearn-0.0.3}/rslearn/data_sources/local_files.py +153 -103
  20. {rslearn-0.0.1 → rslearn-0.0.3}/rslearn/data_sources/openstreetmap.py +33 -39
  21. {rslearn-0.0.1 → rslearn-0.0.3}/rslearn/data_sources/planet.py +17 -35
  22. rslearn-0.0.3/rslearn/data_sources/planet_basemap.py +296 -0
  23. rslearn-0.0.3/rslearn/data_sources/planetary_computer.py +764 -0
  24. rslearn-0.0.3/rslearn/data_sources/raster_source.py +23 -0
  25. rslearn-0.0.3/rslearn/data_sources/usda_cdl.py +206 -0
  26. {rslearn-0.0.1 → rslearn-0.0.3}/rslearn/data_sources/usgs_landsat.py +130 -73
  27. rslearn-0.0.3/rslearn/data_sources/utils.py +319 -0
  28. rslearn-0.0.3/rslearn/data_sources/vector_source.py +1 -0
  29. rslearn-0.0.3/rslearn/data_sources/worldcereal.py +456 -0
  30. rslearn-0.0.3/rslearn/data_sources/worldcover.py +142 -0
  31. rslearn-0.0.3/rslearn/data_sources/worldpop.py +156 -0
  32. {rslearn-0.0.1 → rslearn-0.0.3}/rslearn/data_sources/xyz_tiles.py +141 -79
  33. rslearn-0.0.3/rslearn/dataset/__init__.py +12 -0
  34. {rslearn-0.0.1 → rslearn-0.0.3}/rslearn/dataset/add_windows.py +1 -1
  35. {rslearn-0.0.1 → rslearn-0.0.3}/rslearn/dataset/dataset.py +43 -7
  36. rslearn-0.0.3/rslearn/dataset/index.py +173 -0
  37. {rslearn-0.0.1 → rslearn-0.0.3}/rslearn/dataset/manage.py +137 -49
  38. rslearn-0.0.3/rslearn/dataset/materialize.py +591 -0
  39. rslearn-0.0.3/rslearn/dataset/window.py +379 -0
  40. rslearn-0.0.3/rslearn/log_utils.py +24 -0
  41. {rslearn-0.0.1 → rslearn-0.0.3}/rslearn/main.py +351 -130
  42. rslearn-0.0.3/rslearn/models/clip.py +62 -0
  43. rslearn-0.0.3/rslearn/models/conv.py +56 -0
  44. rslearn-0.0.3/rslearn/models/croma.py +270 -0
  45. rslearn-0.0.3/rslearn/models/detr/__init__.py +5 -0
  46. rslearn-0.0.3/rslearn/models/detr/box_ops.py +103 -0
  47. rslearn-0.0.3/rslearn/models/detr/detr.py +493 -0
  48. rslearn-0.0.3/rslearn/models/detr/matcher.py +107 -0
  49. rslearn-0.0.3/rslearn/models/detr/position_encoding.py +114 -0
  50. rslearn-0.0.3/rslearn/models/detr/transformer.py +429 -0
  51. rslearn-0.0.3/rslearn/models/detr/util.py +24 -0
  52. {rslearn-0.0.1 → rslearn-0.0.3}/rslearn/models/faster_rcnn.py +10 -19
  53. {rslearn-0.0.1 → rslearn-0.0.3}/rslearn/models/fpn.py +1 -1
  54. rslearn-0.0.3/rslearn/models/module_wrapper.py +91 -0
  55. rslearn-0.0.3/rslearn/models/moe/distributed.py +262 -0
  56. rslearn-0.0.3/rslearn/models/moe/soft.py +676 -0
  57. rslearn-0.0.3/rslearn/models/molmo.py +65 -0
  58. rslearn-0.0.3/rslearn/models/multitask.py +392 -0
  59. rslearn-0.0.3/rslearn/models/pick_features.py +46 -0
  60. {rslearn-0.0.1 → rslearn-0.0.3}/rslearn/models/pooling_decoder.py +4 -2
  61. {rslearn-0.0.1 → rslearn-0.0.3}/rslearn/models/satlaspretrain.py +4 -7
  62. {rslearn-0.0.1 → rslearn-0.0.3}/rslearn/models/simple_time_series.py +75 -59
  63. {rslearn-0.0.1 → rslearn-0.0.3}/rslearn/models/singletask.py +8 -4
  64. {rslearn-0.0.1 → rslearn-0.0.3}/rslearn/models/ssl4eo_s12.py +10 -10
  65. {rslearn-0.0.1 → rslearn-0.0.3}/rslearn/models/swin.py +22 -21
  66. rslearn-0.0.3/rslearn/models/task_embedding.py +250 -0
  67. rslearn-0.0.3/rslearn/models/terramind.py +219 -0
  68. rslearn-0.0.3/rslearn/models/trunk.py +280 -0
  69. {rslearn-0.0.1 → rslearn-0.0.3}/rslearn/models/unet.py +21 -5
  70. rslearn-0.0.3/rslearn/models/upsample.py +35 -0
  71. rslearn-0.0.3/rslearn/models/use_croma.py +508 -0
  72. rslearn-0.0.3/rslearn/tile_stores/__init__.py +71 -0
  73. rslearn-0.0.3/rslearn/tile_stores/default.py +382 -0
  74. rslearn-0.0.3/rslearn/tile_stores/tile_store.py +328 -0
  75. {rslearn-0.0.1 → rslearn-0.0.3}/rslearn/train/callbacks/freeze_unfreeze.py +32 -20
  76. rslearn-0.0.3/rslearn/train/callbacks/gradients.py +109 -0
  77. rslearn-0.0.3/rslearn/train/callbacks/peft.py +116 -0
  78. rslearn-0.0.3/rslearn/train/data_module.py +562 -0
  79. rslearn-0.0.3/rslearn/train/dataset.py +1183 -0
  80. {rslearn-0.0.1 → rslearn-0.0.3}/rslearn/train/lightning_module.py +164 -54
  81. rslearn-0.0.3/rslearn/train/optimizer.py +31 -0
  82. rslearn-0.0.3/rslearn/train/prediction_writer.py +335 -0
  83. rslearn-0.0.3/rslearn/train/scheduler.py +62 -0
  84. {rslearn-0.0.1 → rslearn-0.0.3}/rslearn/train/tasks/classification.py +13 -12
  85. {rslearn-0.0.1 → rslearn-0.0.3}/rslearn/train/tasks/detection.py +101 -39
  86. {rslearn-0.0.1 → rslearn-0.0.3}/rslearn/train/tasks/multi_task.py +24 -9
  87. {rslearn-0.0.1 → rslearn-0.0.3}/rslearn/train/tasks/regression.py +113 -21
  88. rslearn-0.0.3/rslearn/train/tasks/segmentation.py +547 -0
  89. {rslearn-0.0.1 → rslearn-0.0.3}/rslearn/train/tasks/task.py +2 -2
  90. {rslearn-0.0.1 → rslearn-0.0.3}/rslearn/train/transforms/__init__.py +1 -1
  91. {rslearn-0.0.1 → rslearn-0.0.3}/rslearn/train/transforms/concatenate.py +9 -5
  92. {rslearn-0.0.1 → rslearn-0.0.3}/rslearn/train/transforms/crop.py +8 -4
  93. {rslearn-0.0.1 → rslearn-0.0.3}/rslearn/train/transforms/flip.py +5 -1
  94. {rslearn-0.0.1 → rslearn-0.0.3}/rslearn/train/transforms/normalize.py +34 -10
  95. {rslearn-0.0.1 → rslearn-0.0.3}/rslearn/train/transforms/pad.py +1 -1
  96. rslearn-0.0.3/rslearn/train/transforms/transform.py +131 -0
  97. {rslearn-0.0.1 → rslearn-0.0.3}/rslearn/utils/__init__.py +2 -6
  98. {rslearn-0.0.1 → rslearn-0.0.3}/rslearn/utils/array.py +2 -2
  99. {rslearn-0.0.1 → rslearn-0.0.3}/rslearn/utils/feature.py +2 -2
  100. {rslearn-0.0.1 → rslearn-0.0.3}/rslearn/utils/fsspec.py +70 -1
  101. {rslearn-0.0.1 → rslearn-0.0.3}/rslearn/utils/geometry.py +214 -7
  102. {rslearn-0.0.1 → rslearn-0.0.3}/rslearn/utils/get_utm_ups_crs.py +2 -3
  103. {rslearn-0.0.1 → rslearn-0.0.3}/rslearn/utils/grid_index.py +5 -5
  104. rslearn-0.0.3/rslearn/utils/jsonargparse.py +33 -0
  105. {rslearn-0.0.1 → rslearn-0.0.3}/rslearn/utils/mp.py +4 -3
  106. {rslearn-0.0.1 → rslearn-0.0.3}/rslearn/utils/raster_format.py +211 -96
  107. {rslearn-0.0.1 → rslearn-0.0.3}/rslearn/utils/rtree_index.py +64 -17
  108. {rslearn-0.0.1 → rslearn-0.0.3}/rslearn/utils/sqlite_index.py +7 -1
  109. rslearn-0.0.3/rslearn/utils/vector_format.py +411 -0
  110. {rslearn-0.0.1 → rslearn-0.0.3/rslearn.egg-info}/PKG-INFO +365 -283
  111. {rslearn-0.0.1 → rslearn-0.0.3}/rslearn.egg-info/SOURCES.txt +38 -5
  112. rslearn-0.0.3/rslearn.egg-info/requires.txt +49 -0
  113. rslearn-0.0.1/README.md +0 -447
  114. rslearn-0.0.1/extra_requirements.txt +0 -11
  115. rslearn-0.0.1/pyproject.toml +0 -50
  116. rslearn-0.0.1/requirements.txt +0 -15
  117. rslearn-0.0.1/rslearn/const.py +0 -23
  118. rslearn-0.0.1/rslearn/data_sources/copernicus.py +0 -42
  119. rslearn-0.0.1/rslearn/data_sources/gcp_public_data.py +0 -529
  120. rslearn-0.0.1/rslearn/data_sources/google_earth_engine.py +0 -298
  121. rslearn-0.0.1/rslearn/data_sources/raster_source.py +0 -309
  122. rslearn-0.0.1/rslearn/data_sources/utils.py +0 -124
  123. rslearn-0.0.1/rslearn/data_sources/vector_source.py +0 -0
  124. rslearn-0.0.1/rslearn/dataset/__init__.py +0 -6
  125. rslearn-0.0.1/rslearn/dataset/materialize.py +0 -250
  126. rslearn-0.0.1/rslearn/dataset/window.py +0 -188
  127. rslearn-0.0.1/rslearn/models/multitask.py +0 -65
  128. rslearn-0.0.1/rslearn/models/pick_features.py +0 -33
  129. rslearn-0.0.1/rslearn/tile_stores/__init__.py +0 -37
  130. rslearn-0.0.1/rslearn/tile_stores/file.py +0 -242
  131. rslearn-0.0.1/rslearn/tile_stores/tile_store.py +0 -224
  132. rslearn-0.0.1/rslearn/train/data_module.py +0 -169
  133. rslearn-0.0.1/rslearn/train/dataset.py +0 -637
  134. rslearn-0.0.1/rslearn/train/prediction_writer.py +0 -178
  135. rslearn-0.0.1/rslearn/train/tasks/segmentation.py +0 -229
  136. rslearn-0.0.1/rslearn/train/transforms/transform.py +0 -129
  137. rslearn-0.0.1/rslearn/utils/mgrs.py +0 -24
  138. rslearn-0.0.1/rslearn/utils/utils.py +0 -22
  139. rslearn-0.0.1/rslearn/utils/vector_format.py +0 -253
  140. rslearn-0.0.1/rslearn.egg-info/requires.txt +0 -28
  141. {rslearn-0.0.1 → rslearn-0.0.3}/LICENSE +0 -0
  142. {rslearn-0.0.1 → rslearn-0.0.3}/rslearn/__init__.py +0 -0
  143. {rslearn-0.0.1 → rslearn-0.0.3}/rslearn/dataset/remap.py +0 -0
  144. {rslearn-0.0.1 → rslearn-0.0.3}/rslearn/models/__init__.py +0 -0
  145. {rslearn-0.0.1 → rslearn-0.0.3}/rslearn/models/registry.py +0 -0
  146. {rslearn-0.0.1 → rslearn-0.0.3}/rslearn/models/sam2_enc.py +0 -0
  147. /rslearn-0.0.1/rslearn/data_sources/geotiff.py → /rslearn-0.0.3/rslearn/py.typed +0 -0
  148. {rslearn-0.0.1 → rslearn-0.0.3}/rslearn/train/__init__.py +0 -0
  149. {rslearn-0.0.1 → rslearn-0.0.3}/rslearn/train/callbacks/__init__.py +0 -0
  150. {rslearn-0.0.1 → rslearn-0.0.3}/rslearn/train/tasks/__init__.py +0 -0
  151. {rslearn-0.0.1 → rslearn-0.0.3}/rslearn/utils/spatial_index.py +0 -0
  152. {rslearn-0.0.1 → rslearn-0.0.3}/rslearn/utils/time.py +0 -0
  153. {rslearn-0.0.1 → rslearn-0.0.3}/rslearn.egg-info/dependency_links.txt +0 -0
  154. {rslearn-0.0.1 → rslearn-0.0.3}/rslearn.egg-info/entry_points.txt +0 -0
  155. {rslearn-0.0.1 → rslearn-0.0.3}/rslearn.egg-info/top_level.txt +0 -0
  156. {rslearn-0.0.1 → rslearn-0.0.3}/setup.cfg +0 -0
@@ -1,6 +1,6 @@
1
- Metadata-Version: 2.1
1
+ Metadata-Version: 2.4
2
2
  Name: rslearn
3
- Version: 0.0.1
3
+ Version: 0.0.3
4
4
  Summary: A library for developing remote sensing datasets and models
5
5
  Author-email: Favyen Bastani <favyenb@allenai.org>, Yawen Zhang <yawenz@allenai.org>, Patrick Beukema <patrickb@allenai.org>, Henry Herzog <henryh@allenai.org>, Piper Wolters <piperw@allenai.org>
6
6
  License: Apache License
@@ -205,36 +205,57 @@ License: Apache License
205
205
  See the License for the specific language governing permissions and
206
206
  limitations under the License.
207
207
 
208
- Requires-Python: >=3.10
208
+ Requires-Python: >=3.11
209
209
  Description-Content-Type: text/markdown
210
210
  License-File: LICENSE
211
- Requires-Dist: boto3
212
- Requires-Dist: class_registry
213
- Requires-Dist: python-dateutil
214
- Requires-Dist: pytimeparse
215
- Requires-Dist: fiona
216
- Requires-Dist: fsspec[gcs,s3]
217
- Requires-Dist: Pillow
218
- Requires-Dist: pyproj
219
- Requires-Dist: rasterio
220
- Requires-Dist: shapely
221
- Requires-Dist: tqdm
222
- Requires-Dist: torch
223
- Requires-Dist: torchvision
224
- Requires-Dist: universal_pathlib
225
- Requires-Dist: lightning[pytorch-extra]
211
+ Requires-Dist: boto3>=1.39
212
+ Requires-Dist: class_registry>=2.1
213
+ Requires-Dist: fiona>=1.10
214
+ Requires-Dist: fsspec==2025.3.0
215
+ Requires-Dist: jsonargparse>=4.35.0
216
+ Requires-Dist: lightning>=2.5.1.post0
217
+ Requires-Dist: Pillow>=11.3
218
+ Requires-Dist: pyproj>=3.7
219
+ Requires-Dist: python-dateutil>=2.9
220
+ Requires-Dist: pytimeparse>=1.1
221
+ Requires-Dist: rasterio>=1.4
222
+ Requires-Dist: shapely>=2.1
223
+ Requires-Dist: torch>=2.7.0
224
+ Requires-Dist: torchvision>=0.22.0
225
+ Requires-Dist: tqdm>=4.67
226
+ Requires-Dist: universal_pathlib>=0.2.6
226
227
  Provides-Extra: extra
227
- Requires-Dist: earthengine-api; extra == "extra"
228
- Requires-Dist: gcsfs; extra == "extra"
229
- Requires-Dist: google-cloud-storage; extra == "extra"
230
- Requires-Dist: mgrs; extra == "extra"
231
- Requires-Dist: osmium; extra == "extra"
232
- Requires-Dist: planet; extra == "extra"
233
- Requires-Dist: pycocotools; extra == "extra"
234
- Requires-Dist: rtree; extra == "extra"
235
- Requires-Dist: satlaspretrain_models; extra == "extra"
236
- Requires-Dist: scipy; extra == "extra"
237
- Requires-Dist: wandb; extra == "extra"
228
+ Requires-Dist: accelerate>=1.10; extra == "extra"
229
+ Requires-Dist: cdsapi>=0.7.6; extra == "extra"
230
+ Requires-Dist: earthdaily[platform]>=1.0.0; extra == "extra"
231
+ Requires-Dist: earthengine-api>=1.6.3; extra == "extra"
232
+ Requires-Dist: einops>=0.8; extra == "extra"
233
+ Requires-Dist: gcsfs==2025.3.0; extra == "extra"
234
+ Requires-Dist: google-cloud-bigquery>=3.35; extra == "extra"
235
+ Requires-Dist: google-cloud-storage>=2.18; extra == "extra"
236
+ Requires-Dist: netCDF4>=1.7.2; extra == "extra"
237
+ Requires-Dist: osmium>=4.0.2; extra == "extra"
238
+ Requires-Dist: planet>=3.1; extra == "extra"
239
+ Requires-Dist: planetary_computer>=1.0; extra == "extra"
240
+ Requires-Dist: pycocotools>=2.0; extra == "extra"
241
+ Requires-Dist: pystac_client>=0.9; extra == "extra"
242
+ Requires-Dist: rtree>=1.4; extra == "extra"
243
+ Requires-Dist: s3fs==2025.3.0; extra == "extra"
244
+ Requires-Dist: satlaspretrain_models>=0.3; extra == "extra"
245
+ Requires-Dist: scipy>=1.16; extra == "extra"
246
+ Requires-Dist: terratorch>=1.0.2; extra == "extra"
247
+ Requires-Dist: transformers>=4.55; extra == "extra"
248
+ Requires-Dist: wandb>=0.21; extra == "extra"
249
+ Provides-Extra: dev
250
+ Requires-Dist: interrogate>=1.7.0; extra == "dev"
251
+ Requires-Dist: mypy<2,>=1.17.1; extra == "dev"
252
+ Requires-Dist: pre-commit>=4.3.0; extra == "dev"
253
+ Requires-Dist: pytest>=8.0; extra == "dev"
254
+ Requires-Dist: pytest_httpserver; extra == "dev"
255
+ Requires-Dist: ruff>=0.12.9; extra == "dev"
256
+ Requires-Dist: pytest-dotenv; extra == "dev"
257
+ Requires-Dist: pytest-xdist; extra == "dev"
258
+ Dynamic: license-file
238
259
 
239
260
  Overview
240
261
  --------
@@ -254,10 +275,11 @@ rslearn helps with:
254
275
 
255
276
 
256
277
  Quick links:
257
- - [CoreConcepts](CoreConcepts.md) summarizes key concepts in rslearn, including
278
+ - [CoreConcepts](docs/CoreConcepts.md) summarizes key concepts in rslearn, including
258
279
  datasets, windows, layers, and data sources.
259
- - [Examples](Examples.md) contains more examples, including customizing different
280
+ - [Examples](docs/Examples.md) contains more examples, including customizing different
260
281
  stages of rslearn with additional code.
282
+ - [DatasetConfig](docs/DatasetConfig.md) documents the dataset configuration file.
261
283
 
262
284
 
263
285
  Setup
@@ -265,9 +287,33 @@ Setup
265
287
 
266
288
  rslearn requires Python 3.10+ (Python 3.12 is recommended).
267
289
 
268
- git clone https://github.com/allenai/rslearn.git
269
- cd rslearn
270
- pip install .[extra]
290
+ ```
291
+ git clone https://github.com/allenai/rslearn.git
292
+ cd rslearn
293
+ pip install .[extra]
294
+ ```
295
+
296
+
297
+ Supported Data Sources
298
+ ----------------------
299
+
300
+ rslearn supports ingesting raster and vector data from the following data sources. Even
301
+ if you don't plan to train models within rslearn, you can still use it to easily
302
+ download, crop, and re-project data based on spatiotemporal rectangles (windows) that
303
+ you define. See [Examples](docs/Examples.md) and [DatasetConfig](docs/DatasetConfig.md)
304
+ for how to setup these data sources.
305
+
306
+ - Sentinel-1
307
+ - Sentinel-2 L1C and L2A
308
+ - Landsat 8/9 OLI-TIRS
309
+ - National Agriculture Imagery Program
310
+ - OpenStreetMap
311
+ - Xyz (Slippy) Tiles (e.g., Mapbox tiles)
312
+ - Planet Labs (PlanetScope, SkySat)
313
+ - ESA WorldCover 2021
314
+
315
+ rslearn can also be used to easily mosaic, crop, and re-project any sets of local
316
+ raster and vector files you may have.
271
317
 
272
318
 
273
319
  Example Usage
@@ -281,28 +327,25 @@ Let's start by defining a region of interest and obtaining Sentinel-2 images. Cr
281
327
  directory `/path/to/dataset` and corresponding configuration file at
282
328
  `/path/to/dataset/config.json` as follows:
283
329
 
284
- {
285
- "layers": {
286
- "sentinel2": {
287
- "type": "raster",
288
- "band_sets": [{
289
- "dtype": "uint8",
290
- "bands": ["R", "G", "B"]
291
- }],
292
- "data_source": {
293
- "name": "rslearn.data_sources.gcp_public_data.Sentinel2",
294
- "index_cache_dir": "cache/sentinel2/",
295
- "max_time_delta": "1d",
296
- "sort_by": "cloud_cover",
297
- "use_rtree_index": false
298
- }
330
+ ```json
331
+ {
332
+ "layers": {
333
+ "sentinel2": {
334
+ "type": "raster",
335
+ "band_sets": [{
336
+ "dtype": "uint8",
337
+ "bands": ["R", "G", "B"]
338
+ }],
339
+ "data_source": {
340
+ "name": "rslearn.data_sources.gcp_public_data.Sentinel2",
341
+ "index_cache_dir": "cache/sentinel2/",
342
+ "sort_by": "cloud_cover",
343
+ "use_rtree_index": false
299
344
  }
300
- },
301
- "tile_store": {
302
- "name": "file",
303
- "root_dir": "tiles"
304
345
  }
305
346
  }
347
+ }
348
+ ```
306
349
 
307
350
  Here, we have initialized an empty dataset and defined a raster layer called
308
351
  `sentinel2`. Because it specifies a data source, it will be populated automatically. In
@@ -314,8 +357,10 @@ choosing the scenes with minimal cloud cover.
314
357
  Next, let's create our spatiotemporal windows. These will correspond to training
315
358
  examples.
316
359
 
317
- export DATASET_PATH=/path/to/dataset
318
- rslearn dataset add_windows --root $DATASET_PATH --group default --utm --resolution 10 --grid_size 128 --src_crs EPSG:4326 --box=-122.6901,47.2079,-121.4955,47.9403 --start 2024-06-01T00:00:00+00:00 --end 2024-08-01T00:00:00+00:00 --name seattle
360
+ ```
361
+ export DATASET_PATH=/path/to/dataset
362
+ rslearn dataset add_windows --root $DATASET_PATH --group default --utm --resolution 10 --grid_size 128 --src_crs EPSG:4326 --box=-122.6901,47.2079,-121.4955,47.9403 --start 2024-06-01T00:00:00+00:00 --end 2024-08-01T00:00:00+00:00 --name seattle
363
+ ```
319
364
 
320
365
  This creates windows along a 128x128 grid in the specified projection (i.e.,
321
366
  appropriate UTM zone for the location with 10 m/pixel resolution) covering the
@@ -327,9 +372,11 @@ We can now obtain the Sentinel-2 images by running prepare, ingest, and material
327
372
  * Ingest: retrieve those items. This step populates the `tiles` directory within the dataset.
328
373
  * Materialize: crop/mosaic the items to align with the windows. This populates the `layers` folder in each window directory.
329
374
 
330
- rslearn dataset prepare --root $DATASET_PATH --workers 32 --batch-size 8
331
- rslearn dataset ingest --root $DATASET_PATH --workers 32 --no-use-initial-job --jobs-per-process 1
332
- rslearn dataset materialize --root $DATASET_PATH --workers 32 --no-use-initial-job
375
+ ```
376
+ rslearn dataset prepare --root $DATASET_PATH --workers 32 --batch-size 8
377
+ rslearn dataset ingest --root $DATASET_PATH --workers 32 --no-use-initial-job --jobs-per-process 1
378
+ rslearn dataset materialize --root $DATASET_PATH --workers 32 --no-use-initial-job
379
+ ```
333
380
 
334
381
  For ingestion, you may need to reduce the number of workers depending on the available
335
382
  memory on your system.
@@ -337,32 +384,36 @@ memory on your system.
337
384
  You should now be able to open the GeoTIFF images. Let's find the window that
338
385
  corresponds to downtown Seattle:
339
386
 
340
- import shapely
341
- from rslearn.const import WGS84_PROJECTION
342
- from rslearn.dataset import Dataset
343
- from rslearn.utils import Projection, STGeometry
344
- from upath import UPath
345
-
346
- # Define longitude and latitude for downtown Seattle.
347
- downtown_seattle = shapely.Point(-122.333, 47.606)
348
-
349
- # Iterate over the windows and find the closest one.
350
- dataset = Dataset(path=UPath("/path/to/dataset"))
351
- best_window_name = None
352
- best_distance = None
353
- for window in dataset.load_windows(workers=32):
354
- shp = window.get_geometry().to_projection(WGS84_PROJECTION).shp
355
- distance = shp.distance(downtown_seattle)
356
- if best_distance is None or distance < best_distance:
357
- best_window_name = window.name
358
- best_distance = distance
359
-
360
- print(best_window_name)
387
+ ```python
388
+ import shapely
389
+ from rslearn.const import WGS84_PROJECTION
390
+ from rslearn.dataset import Dataset
391
+ from rslearn.utils import Projection, STGeometry
392
+ from upath import UPath
393
+
394
+ # Define longitude and latitude for downtown Seattle.
395
+ downtown_seattle = shapely.Point(-122.333, 47.606)
396
+
397
+ # Iterate over the windows and find the closest one.
398
+ dataset = Dataset(path=UPath("/path/to/dataset"))
399
+ best_window_name = None
400
+ best_distance = None
401
+ for window in dataset.load_windows(workers=32):
402
+ shp = window.get_geometry().to_projection(WGS84_PROJECTION).shp
403
+ distance = shp.distance(downtown_seattle)
404
+ if best_distance is None or distance < best_distance:
405
+ best_window_name = window.name
406
+ best_distance = distance
407
+
408
+ print(best_window_name)
409
+ ```
361
410
 
362
411
  It should be `seattle_54912_-527360`, so let's open it in qgis (or your favorite GIS
363
412
  software):
364
413
 
365
- qgis $DATASET_PATH/windows/default/seattle_54912_-527360/layers/sentinel2/R_G_B/geotiff.tif
414
+ ```
415
+ qgis $DATASET_PATH/windows/default/seattle_54912_-527360/layers/sentinel2/R_G_B/geotiff.tif
416
+ ```
366
417
 
367
418
 
368
419
  ### Adding Land Cover Labels
@@ -372,152 +423,164 @@ the ESA WorldCover land cover map as labels.
372
423
 
373
424
  Start by downloading the WorldCover data from https://worldcover2021.esa.int
374
425
 
375
- wget https://worldcover2021.esa.int/data/archive/ESA_WorldCover_10m_2021_v200_60deg_macrotile_N30W180.zip
376
- mkdir world_cover_tifs
377
- unzip ESA_WorldCover_10m_2021_v200_60deg_macrotile_N30W180.zip -d world_cover_tifs/
426
+ ```
427
+ wget https://worldcover2021.esa.int/data/archive/ESA_WorldCover_10m_2021_v200_60deg_macrotile_N30W180.zip
428
+ mkdir world_cover_tifs
429
+ unzip ESA_WorldCover_10m_2021_v200_60deg_macrotile_N30W180.zip -d world_cover_tifs/
430
+ ```
378
431
 
379
432
  It would require some work to write a script to re-project and crop these GeoTIFFs so
380
433
  that they align with the windows we have previously defined (and the Sentinel-2 images
381
434
  we have already ingested). We can use the LocalFiles data source to have rslearn
382
435
  automate this process. Update the dataset `config.json` with a new layer:
383
436
 
384
- "layers": {
385
- "sentinel2": {
386
- ...
387
- },
388
- "worldcover": {
389
- "type": "raster",
390
- "band_sets": [{
391
- "dtype": "uint8",
392
- "bands": ["B1"]
393
- }],
394
- "resampling_method": "nearest",
395
- "data_source": {
396
- "name": "rslearn.data_sources.local_files.LocalFiles",
397
- "src_dir": "file:///path/to/world_cover_tifs/"
398
- }
399
- }
437
+ ```json
438
+ "layers": {
439
+ "sentinel2": {
440
+ ...
400
441
  },
401
- ...
442
+ "worldcover": {
443
+ "type": "raster",
444
+ "band_sets": [{
445
+ "dtype": "uint8",
446
+ "bands": ["B1"]
447
+ }],
448
+ "resampling_method": "nearest",
449
+ "data_source": {
450
+ "name": "rslearn.data_sources.local_files.LocalFiles",
451
+ "src_dir": "file:///path/to/world_cover_tifs/"
452
+ }
453
+ }
454
+ },
455
+ ...
456
+ ```
402
457
 
403
458
  Repeat the materialize process so we populate the data for this new layer:
404
459
 
405
- rslearn dataset prepare --root $DATASET_PATH --workers 32 --batch-size 8
406
- rslearn dataset ingest --root $DATASET_PATH --workers 32 --no-use-initial-job --jobs-per-process 1
407
- rslearn dataset materialize --root $DATASET_PATH --workers 32 --no-use-initial-job
460
+ ```
461
+ rslearn dataset prepare --root $DATASET_PATH --workers 32 --batch-size 8
462
+ rslearn dataset ingest --root $DATASET_PATH --workers 32 --no-use-initial-job --jobs-per-process 1
463
+ rslearn dataset materialize --root $DATASET_PATH --workers 32 --no-use-initial-job
464
+ ```
408
465
 
409
466
  We can visualize both the GeoTIFFs together in qgis:
410
467
 
411
- qgis $DATASET_PATH/windows/default/seattle_54912_-527360/layers/*/*/geotiff.tif
468
+ ```
469
+ qgis $DATASET_PATH/windows/default/seattle_54912_-527360/layers/*/*/geotiff.tif
470
+ ```
412
471
 
413
472
 
414
473
  ### Training a Model
415
474
 
416
475
  Create a model configuration file `land_cover_model.yaml`:
417
476
 
477
+ ```yaml
478
+ model:
479
+ class_path: rslearn.train.lightning_module.RslearnLightningModule
480
+ init_args:
481
+ # This part defines the model architecture.
482
+ # Essentially we apply the SatlasPretrain Sentinel-2 backbone with a UNet decoder
483
+ # that terminates at a segmentation prediction head.
484
+ # The backbone outputs four feature maps at different scales, and the UNet uses
485
+ # these to compute a feature map at the input scale.
486
+ # Finally the segmentation head applies per-pixel softmax to compute the land
487
+ # cover class.
418
488
  model:
419
- class_path: rslearn.train.lightning_module.RslearnLightningModule
489
+ class_path: rslearn.models.singletask.SingleTaskModel
420
490
  init_args:
421
- # This part defines the model architecture.
422
- # Essentially we apply the SatlasPretrain Sentinel-2 backbone with a UNet decoder
423
- # that terminates at a segmentation prediction head.
424
- # The backbone outputs four feature maps at different scales, and the UNet uses
425
- # these to compute a feature map at the input scale.
426
- # Finally the segmentation head applies per-pixel softmax to compute the land
427
- # cover class.
428
- model:
429
- class_path: rslearn.models.singletask.SingleTaskModel
430
- init_args:
431
- encoder:
432
- - class_path: rslearn.models.satlaspretrain.SatlasPretrain
433
- init_args:
434
- model_identifier: "Sentinel2_SwinB_SI_RGB"
435
- decoder:
436
- - class_path: rslearn.models.unet.UNetDecoder
437
- init_args:
438
- in_channels: [[4, 128], [8, 256], [16, 512], [32, 1024]]
439
- # We use 101 classes because the WorldCover classes are 10, 20, 30, 40
440
- # 50, 60, 70, 80, 90, 95, 100.
441
- # We could process the GeoTIFFs to collapse them to 0-10 (the 11 actual
442
- # classes) but the model will quickly learn that the intermediate
443
- # values are never used.
444
- out_channels: 101
445
- conv_layers_per_resolution: 2
446
- - class_path: rslearn.train.tasks.segmentation.SegmentationHead
447
- # Remaining parameters in RslearnLightningModule define different aspects of the
448
- # training process like initial learning rate.
449
- lr: 0.0001
450
- data:
451
- class_path: rslearn.train.data_module.RslearnDataModule
491
+ encoder:
492
+ - class_path: rslearn.models.satlaspretrain.SatlasPretrain
493
+ init_args:
494
+ model_identifier: "Sentinel2_SwinB_SI_RGB"
495
+ decoder:
496
+ - class_path: rslearn.models.unet.UNetDecoder
497
+ init_args:
498
+ in_channels: [[4, 128], [8, 256], [16, 512], [32, 1024]]
499
+ # We use 101 classes because the WorldCover classes are 10, 20, 30, 40
500
+ # 50, 60, 70, 80, 90, 95, 100.
501
+ # We could process the GeoTIFFs to collapse them to 0-10 (the 11 actual
502
+ # classes) but the model will quickly learn that the intermediate
503
+ # values are never used.
504
+ out_channels: 101
505
+ conv_layers_per_resolution: 2
506
+ - class_path: rslearn.train.tasks.segmentation.SegmentationHead
507
+ # Remaining parameters in RslearnLightningModule define different aspects of the
508
+ # training process like initial learning rate.
509
+ lr: 0.0001
510
+ data:
511
+ class_path: rslearn.train.data_module.RslearnDataModule
512
+ init_args:
513
+ # Replace this with the dataset path.
514
+ path: /path/to/dataset/
515
+ # This defines the layers that should be read for each window.
516
+ # The key ("image" / "targets") is what the data will be called in the model,
517
+ # while the layers option specifies which layers will be read.
518
+ inputs:
519
+ image:
520
+ data_type: "raster"
521
+ layers: ["sentinel2"]
522
+ bands: ["R", "G", "B"]
523
+ passthrough: true
524
+ targets:
525
+ data_type: "raster"
526
+ layers: ["worldcover"]
527
+ bands: ["B1"]
528
+ is_target: true
529
+ task:
530
+ # Train for semantic segmentation.
531
+ # The remap option is only used when visualizing outputs during testing.
532
+ class_path: rslearn.train.tasks.segmentation.SegmentationTask
452
533
  init_args:
453
- # Replace this with the dataset path.
454
- path: /path/to/dataset/
455
- # This defines the layers that should be read for each window.
456
- # The key ("image" / "targets") is what the data will be called in the model,
457
- # while the layers option specifies which layers will be read.
458
- inputs:
459
- image:
460
- data_type: "raster"
461
- layers: ["sentinel2"]
462
- bands: ["R", "G", "B"]
463
- passthrough: true
464
- targets:
465
- data_type: "raster"
466
- layers: ["worldcover"]
467
- bands: ["B1"]
468
- is_target: true
469
- task:
470
- # Train for semantic segmentation.
471
- # The remap option is only used when visualizing outputs during testing.
472
- class_path: rslearn.train.tasks.segmentation.SegmentationTask
534
+ num_classes: 101
535
+ remap_values: [[0, 1], [0, 255]]
536
+ batch_size: 8
537
+ num_workers: 32
538
+ # These define different options for different phases/splits, like training,
539
+ # validation, and testing.
540
+ # Here we use the same transform across splits except training where we add a
541
+ # flipping augmentation.
542
+ # For now we are using the same windows for training and validation.
543
+ default_config:
544
+ transforms:
545
+ - class_path: rslearn.train.transforms.normalize.Normalize
473
546
  init_args:
474
- num_classes: 101
475
- remap_values: [[0, 1], [0, 255]]
476
- batch_size: 8
477
- num_workers: 32
478
- # These define different options for different phases/splits, like training,
479
- # validation, and testing.
480
- # Here we use the same transform across splits except training where we add a
481
- # flipping augmentation.
482
- # For now we are using the same windows for training and validation.
483
- default_config:
484
- transforms:
485
- - class_path: rslearn.train.transforms.normalize.Normalize
486
- init_args:
487
- mean: 0
488
- std: 255
489
- train_config:
490
- transforms:
491
- - class_path: rslearn.train.transforms.normalize.Normalize
492
- init_args:
493
- mean: 0
494
- std: 255
495
- - class_path: rslearn.train.transforms.flip.Flip
496
- init_args:
497
- image_selectors: ["image", "target/classes", "target/valid"]
498
- groups: ["default"]
499
- val_config:
500
- groups: ["default"]
501
- test_config:
502
- groups: ["default"]
503
- predict_config:
504
- groups: ["predict"]
505
- load_all_patches: true
506
- skip_targets: true
507
- patch_size: 512
508
- trainer:
509
- max_epochs: 10
510
- callbacks:
511
- - class_path: lightning.pytorch.callbacks.ModelCheckpoint
547
+ mean: 0
548
+ std: 255
549
+ train_config:
550
+ transforms:
551
+ - class_path: rslearn.train.transforms.normalize.Normalize
552
+ init_args:
553
+ mean: 0
554
+ std: 255
555
+ - class_path: rslearn.train.transforms.flip.Flip
512
556
  init_args:
513
- save_top_k: 1
514
- save_last: true
515
- monitor: val_accuracy
516
- mode: max
557
+ image_selectors: ["image", "target/classes", "target/valid"]
558
+ groups: ["default"]
559
+ val_config:
560
+ groups: ["default"]
561
+ test_config:
562
+ groups: ["default"]
563
+ predict_config:
564
+ groups: ["predict"]
565
+ load_all_patches: true
566
+ skip_targets: true
567
+ patch_size: 512
568
+ trainer:
569
+ max_epochs: 10
570
+ callbacks:
571
+ - class_path: lightning.pytorch.callbacks.ModelCheckpoint
572
+ init_args:
573
+ save_top_k: 1
574
+ save_last: true
575
+ monitor: val_accuracy
576
+ mode: max
577
+ ```
517
578
 
518
579
  Now we can train the model:
519
580
 
520
- rslearn model fit --config land_cover_model.yaml
581
+ ```
582
+ rslearn model fit --config land_cover_model.yaml
583
+ ```
521
584
 
522
585
 
523
586
  ### Apply the Model
@@ -528,22 +591,26 @@ windows along a grid, we just create one big window. This is because we are just
528
591
  to run the prediction over the whole window rather than use different windows as
529
592
  different training examples.
530
593
 
531
- rslearn dataset add_windows --root $DATASET_PATH --group predict --utm --resolution 10 --src_crs EPSG:4326 --box=-122.712,45.477,-122.621,45.549 --start 2024-06-01T00:00:00+00:00 --end 2024-08-01T00:00:00+00:00 --name portland
532
- rslearn dataset prepare --root $DATASET_PATH --workers 32 --batch-size 8
533
- rslearn dataset ingest --root $DATASET_PATH --workers 32 --no-use-initial-job --jobs-per-process 1
534
- rslearn dataset materialize --root $DATASET_PATH --workers 32 --no-use-initial-job
594
+ ```
595
+ rslearn dataset add_windows --root $DATASET_PATH --group predict --utm --resolution 10 --src_crs EPSG:4326 --box=-122.712,45.477,-122.621,45.549 --start 2024-06-01T00:00:00+00:00 --end 2024-08-01T00:00:00+00:00 --name portland
596
+ rslearn dataset prepare --root $DATASET_PATH --workers 32 --batch-size 8
597
+ rslearn dataset ingest --root $DATASET_PATH --workers 32 --no-use-initial-job --jobs-per-process 1
598
+ rslearn dataset materialize --root $DATASET_PATH --workers 32 --no-use-initial-job
599
+ ```
535
600
 
536
601
  We also need to add an RslearnPredictionWriter to the trainer callbacks in the model
537
602
  configuration file, as it will handle writing the outputs from the model to a GeoTIFF.
538
603
 
539
- trainer:
540
- callbacks:
541
- - class_path: lightning.pytorch.callbacks.ModelCheckpoint
542
- ...
543
- - class_path: rslearn.train.prediction_writer.RslearnWriter
544
- init_args:
545
- path: /path/to/dataset/
546
- output_layer: output
604
+ ```yaml
605
+ trainer:
606
+ callbacks:
607
+ - class_path: lightning.pytorch.callbacks.ModelCheckpoint
608
+ ...
609
+ - class_path: rslearn.train.prediction_writer.RslearnWriter
610
+ init_args:
611
+ path: /path/to/dataset/
612
+ output_layer: output
613
+ ```
547
614
 
548
615
  Because of our `predict_config`, when we run `model predict` it will apply the model on
549
616
  windows in the "predict" group, which is where we added the Portland window.
@@ -551,39 +618,46 @@ windows in the "predict" group, which is where we added the Portland window.
551
618
  And it will be written in a new output_layer called "output". But we have to update the
552
619
  dataset configuration so it specifies the layer:
553
620
 
554
-
555
- "layers": {
556
- "sentinel2": {
557
- ...
558
- },
559
- "worldcover": {
560
- ...
561
- },
562
- "output": {
563
- "type": "raster",
564
- "band_sets": [{
565
- "dtype": "uint8",
566
- "bands": ["output"]
567
- }]
568
- }
621
+ ```json
622
+ "layers": {
623
+ "sentinel2": {
624
+ ...
625
+ },
626
+ "worldcover": {
627
+ ...
569
628
  },
629
+ "output": {
630
+ "type": "raster",
631
+ "band_sets": [{
632
+ "dtype": "uint8",
633
+ "bands": ["output"]
634
+ }]
635
+ }
636
+ },
637
+ ```
570
638
 
571
639
  Now we can apply the model:
572
640
 
573
- # Find model checkpoint in lightning_logs dir.
574
- ls lightning_logs/*/checkpoints/last.ckpt
575
- rslearn model predict --config land_cover_model.yaml --ckpt_path lightning_logs/version_0/checkpoints/last.ckpt
641
+ ```
642
+ # Find model checkpoint in lightning_logs dir.
643
+ ls lightning_logs/*/checkpoints/last.ckpt
644
+ rslearn model predict --config land_cover_model.yaml --ckpt_path lightning_logs/version_0/checkpoints/last.ckpt
645
+ ```
576
646
 
577
647
  And visualize the Sentinel-2 image and output in qgis:
578
648
 
579
- qgis $DATASET_PATH/windows/predict/portland/layers/*/*/geotiff.tif
649
+ ```
650
+ qgis $DATASET_PATH/windows/predict/portland/layers/*/*/geotiff.tif
651
+ ```
580
652
 
581
653
 
582
654
  ### Defining Train and Validation Splits
583
655
 
584
656
  We can visualize the logged metrics using Tensorboard:
585
657
 
586
- tensorboard --logdir=lightning_logs/
658
+ ```
659
+ tensorboard --logdir=lightning_logs/
660
+ ```
587
661
 
588
662
  However, because our training and validation data are identical, the validation metrics
589
663
  are not meaningful.
@@ -597,57 +671,61 @@ We will use the second approach. The script below sets a "split" key in the opti
597
671
  dict (which is stored in each window's `metadata.json` file) to "train" or "val"
598
672
  based on the SHA-256 hash of the window name.
599
673
 
600
- import hashlib
601
- import tqdm
602
- from rslearn.dataset import Dataset, Window
603
- from upath import UPath
604
-
605
- ds_path = UPath("/path/to/dataset/")
606
- dataset = Dataset(ds_path)
607
- windows = dataset.load_windows(show_progress=True, workers=32)
608
- for window in tqdm.tqdm(windows):
609
- if hashlib.sha256(window.name.encode()).hexdigest()[0] in ["0", "1"]:
610
- split = "val"
611
- else:
612
- split = "train"
613
- if "split" in window.options and window.options["split"] == split:
614
- continue
615
- window.options["split"] = split
616
- window.save()
674
+ ```python
675
+ import hashlib
676
+ import tqdm
677
+ from rslearn.dataset import Dataset, Window
678
+ from upath import UPath
679
+
680
+ ds_path = UPath("/path/to/dataset/")
681
+ dataset = Dataset(ds_path)
682
+ windows = dataset.load_windows(show_progress=True, workers=32)
683
+ for window in tqdm.tqdm(windows):
684
+ if hashlib.sha256(window.name.encode()).hexdigest()[0] in ["0", "1"]:
685
+ split = "val"
686
+ else:
687
+ split = "train"
688
+ if "split" in window.options and window.options["split"] == split:
689
+ continue
690
+ window.options["split"] = split
691
+ window.save()
692
+ ```
617
693
 
618
694
  Now we can update the model configuration file to use these splits:
619
695
 
620
- default_config:
621
- transforms:
622
- - class_path: rslearn.train.transforms.normalize.Normalize
623
- init_args:
624
- mean: 0
625
- std: 255
626
- train_config:
627
- transforms:
628
- - class_path: rslearn.train.transforms.normalize.Normalize
629
- init_args:
630
- mean: 0
631
- std: 255
632
- - class_path: rslearn.train.transforms.flip.Flip
633
- init_args:
634
- image_selectors: ["image", "target/classes", "target/valid"]
635
- groups: ["default"]
636
- tags:
637
- split: train
638
- val_config:
639
- groups: ["default"]
640
- tags:
641
- split: val
642
- test_config:
643
- groups: ["default"]
644
- tags:
645
- split: val
646
- predict_config:
647
- groups: ["predict"]
648
- load_all_patches: true
649
- skip_targets: true
650
- patch_size: 512
696
+ ```yaml
697
+ default_config:
698
+ transforms:
699
+ - class_path: rslearn.train.transforms.normalize.Normalize
700
+ init_args:
701
+ mean: 0
702
+ std: 255
703
+ train_config:
704
+ transforms:
705
+ - class_path: rslearn.train.transforms.normalize.Normalize
706
+ init_args:
707
+ mean: 0
708
+ std: 255
709
+ - class_path: rslearn.train.transforms.flip.Flip
710
+ init_args:
711
+ image_selectors: ["image", "target/classes", "target/valid"]
712
+ groups: ["default"]
713
+ tags:
714
+ split: train
715
+ val_config:
716
+ groups: ["default"]
717
+ tags:
718
+ split: val
719
+ test_config:
720
+ groups: ["default"]
721
+ tags:
722
+ split: val
723
+ predict_config:
724
+ groups: ["predict"]
725
+ load_all_patches: true
726
+ skip_targets: true
727
+ patch_size: 512
728
+ ```
651
729
 
652
730
  The `tags` option that we are adding here tells rslearn to only load windows with a
653
731
  matching key and value in the window options.
@@ -655,13 +733,17 @@ matching key and value in the window options.
655
733
  Previously when we run `model fit`, it should show the same number of windows for
656
734
  training and validation:
657
735
 
658
- got 4752 examples in split train
659
- got 4752 examples in split val
736
+ ```
737
+ got 4752 examples in split train
738
+ got 4752 examples in split val
739
+ ```
660
740
 
661
741
  With the updates, it should show different numbers like this:
662
742
 
663
- got 4167 examples in split train
664
- got 585 examples in split val
743
+ ```
744
+ got 4167 examples in split train
745
+ got 585 examples in split val
746
+ ```
665
747
 
666
748
 
667
749
  ### Visualizing with `model test`