rslearn 0.0.7__tar.gz → 0.0.9__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (170) hide show
  1. {rslearn-0.0.7/rslearn.egg-info → rslearn-0.0.9}/PKG-INFO +3 -4
  2. {rslearn-0.0.7 → rslearn-0.0.9}/README.md +1 -0
  3. {rslearn-0.0.7 → rslearn-0.0.9}/pyproject.toml +5 -5
  4. {rslearn-0.0.7 → rslearn-0.0.9}/rslearn/data_sources/local_files.py +20 -3
  5. {rslearn-0.0.7 → rslearn-0.0.9}/rslearn/data_sources/planetary_computer.py +79 -14
  6. rslearn-0.0.9/rslearn/dataset/handler_summaries.py +130 -0
  7. {rslearn-0.0.7 → rslearn-0.0.9}/rslearn/dataset/manage.py +159 -24
  8. {rslearn-0.0.7 → rslearn-0.0.9}/rslearn/dataset/materialize.py +21 -2
  9. {rslearn-0.0.7 → rslearn-0.0.9}/rslearn/dataset/remap.py +29 -4
  10. {rslearn-0.0.7 → rslearn-0.0.9}/rslearn/main.py +60 -8
  11. {rslearn-0.0.7 → rslearn-0.0.9}/rslearn/models/clay/clay.py +29 -14
  12. {rslearn-0.0.7 → rslearn-0.0.9}/rslearn/models/copernicusfm.py +37 -25
  13. rslearn-0.0.9/rslearn/models/dinov3.py +166 -0
  14. {rslearn-0.0.7 → rslearn-0.0.9}/rslearn/models/galileo/galileo.py +58 -12
  15. {rslearn-0.0.7 → rslearn-0.0.9}/rslearn/models/galileo/single_file_galileo.py +7 -1
  16. {rslearn-0.0.7 → rslearn-0.0.9}/rslearn/models/presto/presto.py +11 -0
  17. {rslearn-0.0.7 → rslearn-0.0.9}/rslearn/models/prithvi.py +139 -52
  18. rslearn-0.0.9/rslearn/models/registry.py +22 -0
  19. rslearn-0.0.9/rslearn/models/resize_features.py +45 -0
  20. {rslearn-0.0.7 → rslearn-0.0.9}/rslearn/models/simple_time_series.py +65 -10
  21. {rslearn-0.0.7 → rslearn-0.0.9}/rslearn/models/upsample.py +2 -2
  22. {rslearn-0.0.7 → rslearn-0.0.9}/rslearn/tile_stores/default.py +34 -7
  23. {rslearn-0.0.7 → rslearn-0.0.9}/rslearn/train/transforms/normalize.py +34 -5
  24. rslearn-0.0.9/rslearn/train/transforms/select_bands.py +67 -0
  25. rslearn-0.0.9/rslearn/train/transforms/sentinel1.py +60 -0
  26. {rslearn-0.0.7 → rslearn-0.0.9}/rslearn/train/transforms/transform.py +23 -6
  27. {rslearn-0.0.7 → rslearn-0.0.9}/rslearn/utils/raster_format.py +44 -5
  28. {rslearn-0.0.7 → rslearn-0.0.9}/rslearn/utils/vector_format.py +35 -4
  29. {rslearn-0.0.7 → rslearn-0.0.9/rslearn.egg-info}/PKG-INFO +3 -4
  30. {rslearn-0.0.7 → rslearn-0.0.9}/rslearn.egg-info/SOURCES.txt +5 -0
  31. {rslearn-0.0.7 → rslearn-0.0.9}/rslearn.egg-info/requires.txt +1 -3
  32. rslearn-0.0.7/rslearn/models/registry.py +0 -5
  33. {rslearn-0.0.7 → rslearn-0.0.9}/LICENSE +0 -0
  34. {rslearn-0.0.7 → rslearn-0.0.9}/rslearn/__init__.py +0 -0
  35. {rslearn-0.0.7 → rslearn-0.0.9}/rslearn/arg_parser.py +0 -0
  36. {rslearn-0.0.7 → rslearn-0.0.9}/rslearn/config/__init__.py +0 -0
  37. {rslearn-0.0.7 → rslearn-0.0.9}/rslearn/config/dataset.py +0 -0
  38. {rslearn-0.0.7 → rslearn-0.0.9}/rslearn/const.py +0 -0
  39. {rslearn-0.0.7 → rslearn-0.0.9}/rslearn/data_sources/__init__.py +0 -0
  40. {rslearn-0.0.7 → rslearn-0.0.9}/rslearn/data_sources/aws_landsat.py +0 -0
  41. {rslearn-0.0.7 → rslearn-0.0.9}/rslearn/data_sources/aws_open_data.py +0 -0
  42. {rslearn-0.0.7 → rslearn-0.0.9}/rslearn/data_sources/aws_sentinel1.py +0 -0
  43. {rslearn-0.0.7 → rslearn-0.0.9}/rslearn/data_sources/climate_data_store.py +0 -0
  44. {rslearn-0.0.7 → rslearn-0.0.9}/rslearn/data_sources/copernicus.py +0 -0
  45. {rslearn-0.0.7 → rslearn-0.0.9}/rslearn/data_sources/data_source.py +0 -0
  46. {rslearn-0.0.7 → rslearn-0.0.9}/rslearn/data_sources/earthdaily.py +0 -0
  47. {rslearn-0.0.7 → rslearn-0.0.9}/rslearn/data_sources/earthdata_srtm.py +0 -0
  48. {rslearn-0.0.7 → rslearn-0.0.9}/rslearn/data_sources/eurocrops.py +0 -0
  49. {rslearn-0.0.7 → rslearn-0.0.9}/rslearn/data_sources/gcp_public_data.py +0 -0
  50. {rslearn-0.0.7 → rslearn-0.0.9}/rslearn/data_sources/geotiff.py +0 -0
  51. {rslearn-0.0.7 → rslearn-0.0.9}/rslearn/data_sources/google_earth_engine.py +0 -0
  52. {rslearn-0.0.7 → rslearn-0.0.9}/rslearn/data_sources/openstreetmap.py +0 -0
  53. {rslearn-0.0.7 → rslearn-0.0.9}/rslearn/data_sources/planet.py +0 -0
  54. {rslearn-0.0.7 → rslearn-0.0.9}/rslearn/data_sources/planet_basemap.py +0 -0
  55. {rslearn-0.0.7 → rslearn-0.0.9}/rslearn/data_sources/raster_source.py +0 -0
  56. {rslearn-0.0.7 → rslearn-0.0.9}/rslearn/data_sources/usda_cdl.py +0 -0
  57. {rslearn-0.0.7 → rslearn-0.0.9}/rslearn/data_sources/usgs_landsat.py +0 -0
  58. {rslearn-0.0.7 → rslearn-0.0.9}/rslearn/data_sources/utils.py +0 -0
  59. {rslearn-0.0.7 → rslearn-0.0.9}/rslearn/data_sources/vector_source.py +0 -0
  60. {rslearn-0.0.7 → rslearn-0.0.9}/rslearn/data_sources/worldcereal.py +0 -0
  61. {rslearn-0.0.7 → rslearn-0.0.9}/rslearn/data_sources/worldcover.py +0 -0
  62. {rslearn-0.0.7 → rslearn-0.0.9}/rslearn/data_sources/worldpop.py +0 -0
  63. {rslearn-0.0.7 → rslearn-0.0.9}/rslearn/data_sources/xyz_tiles.py +0 -0
  64. {rslearn-0.0.7 → rslearn-0.0.9}/rslearn/dataset/__init__.py +0 -0
  65. {rslearn-0.0.7 → rslearn-0.0.9}/rslearn/dataset/add_windows.py +0 -0
  66. {rslearn-0.0.7 → rslearn-0.0.9}/rslearn/dataset/dataset.py +0 -0
  67. {rslearn-0.0.7 → rslearn-0.0.9}/rslearn/dataset/index.py +0 -0
  68. {rslearn-0.0.7 → rslearn-0.0.9}/rslearn/dataset/window.py +0 -0
  69. {rslearn-0.0.7 → rslearn-0.0.9}/rslearn/log_utils.py +0 -0
  70. {rslearn-0.0.7 → rslearn-0.0.9}/rslearn/models/__init__.py +0 -0
  71. {rslearn-0.0.7 → rslearn-0.0.9}/rslearn/models/anysat.py +0 -0
  72. {rslearn-0.0.7 → rslearn-0.0.9}/rslearn/models/clay/configs/metadata.yaml +0 -0
  73. {rslearn-0.0.7 → rslearn-0.0.9}/rslearn/models/clip.py +0 -0
  74. {rslearn-0.0.7 → rslearn-0.0.9}/rslearn/models/conv.py +0 -0
  75. {rslearn-0.0.7 → rslearn-0.0.9}/rslearn/models/copernicusfm_src/__init__.py +0 -0
  76. {rslearn-0.0.7 → rslearn-0.0.9}/rslearn/models/copernicusfm_src/aurora/area.py +0 -0
  77. {rslearn-0.0.7 → rslearn-0.0.9}/rslearn/models/copernicusfm_src/aurora/fourier.py +0 -0
  78. {rslearn-0.0.7 → rslearn-0.0.9}/rslearn/models/copernicusfm_src/dynamic_hypernetwork.py +0 -0
  79. {rslearn-0.0.7 → rslearn-0.0.9}/rslearn/models/copernicusfm_src/flexivit/patch_embed.py +0 -0
  80. {rslearn-0.0.7 → rslearn-0.0.9}/rslearn/models/copernicusfm_src/flexivit/utils.py +0 -0
  81. {rslearn-0.0.7 → rslearn-0.0.9}/rslearn/models/copernicusfm_src/model_vit.py +0 -0
  82. {rslearn-0.0.7 → rslearn-0.0.9}/rslearn/models/copernicusfm_src/util/pos_embed.py +0 -0
  83. {rslearn-0.0.7 → rslearn-0.0.9}/rslearn/models/croma.py +0 -0
  84. {rslearn-0.0.7 → rslearn-0.0.9}/rslearn/models/detr/__init__.py +0 -0
  85. {rslearn-0.0.7 → rslearn-0.0.9}/rslearn/models/detr/box_ops.py +0 -0
  86. {rslearn-0.0.7 → rslearn-0.0.9}/rslearn/models/detr/detr.py +0 -0
  87. {rslearn-0.0.7 → rslearn-0.0.9}/rslearn/models/detr/matcher.py +0 -0
  88. {rslearn-0.0.7 → rslearn-0.0.9}/rslearn/models/detr/position_encoding.py +0 -0
  89. {rslearn-0.0.7 → rslearn-0.0.9}/rslearn/models/detr/transformer.py +0 -0
  90. {rslearn-0.0.7 → rslearn-0.0.9}/rslearn/models/detr/util.py +0 -0
  91. {rslearn-0.0.7 → rslearn-0.0.9}/rslearn/models/faster_rcnn.py +0 -0
  92. {rslearn-0.0.7 → rslearn-0.0.9}/rslearn/models/fpn.py +0 -0
  93. {rslearn-0.0.7 → rslearn-0.0.9}/rslearn/models/galileo/__init__.py +0 -0
  94. {rslearn-0.0.7 → rslearn-0.0.9}/rslearn/models/module_wrapper.py +0 -0
  95. {rslearn-0.0.7 → rslearn-0.0.9}/rslearn/models/molmo.py +0 -0
  96. {rslearn-0.0.7 → rslearn-0.0.9}/rslearn/models/multitask.py +0 -0
  97. {rslearn-0.0.7 → rslearn-0.0.9}/rslearn/models/panopticon.py +0 -0
  98. {rslearn-0.0.7 → rslearn-0.0.9}/rslearn/models/panopticon_data/sensors/drone.yaml +0 -0
  99. {rslearn-0.0.7 → rslearn-0.0.9}/rslearn/models/panopticon_data/sensors/enmap.yaml +0 -0
  100. {rslearn-0.0.7 → rslearn-0.0.9}/rslearn/models/panopticon_data/sensors/goes.yaml +0 -0
  101. {rslearn-0.0.7 → rslearn-0.0.9}/rslearn/models/panopticon_data/sensors/himawari.yaml +0 -0
  102. {rslearn-0.0.7 → rslearn-0.0.9}/rslearn/models/panopticon_data/sensors/intuition.yaml +0 -0
  103. {rslearn-0.0.7 → rslearn-0.0.9}/rslearn/models/panopticon_data/sensors/landsat8.yaml +0 -0
  104. {rslearn-0.0.7 → rslearn-0.0.9}/rslearn/models/panopticon_data/sensors/modis_terra.yaml +0 -0
  105. {rslearn-0.0.7 → rslearn-0.0.9}/rslearn/models/panopticon_data/sensors/qb2_ge1.yaml +0 -0
  106. {rslearn-0.0.7 → rslearn-0.0.9}/rslearn/models/panopticon_data/sensors/sentinel1.yaml +0 -0
  107. {rslearn-0.0.7 → rslearn-0.0.9}/rslearn/models/panopticon_data/sensors/sentinel2.yaml +0 -0
  108. {rslearn-0.0.7 → rslearn-0.0.9}/rslearn/models/panopticon_data/sensors/superdove.yaml +0 -0
  109. {rslearn-0.0.7 → rslearn-0.0.9}/rslearn/models/panopticon_data/sensors/wv23.yaml +0 -0
  110. {rslearn-0.0.7 → rslearn-0.0.9}/rslearn/models/pick_features.py +0 -0
  111. {rslearn-0.0.7 → rslearn-0.0.9}/rslearn/models/pooling_decoder.py +0 -0
  112. {rslearn-0.0.7 → rslearn-0.0.9}/rslearn/models/presto/__init__.py +0 -0
  113. {rslearn-0.0.7 → rslearn-0.0.9}/rslearn/models/presto/single_file_presto.py +0 -0
  114. {rslearn-0.0.7 → rslearn-0.0.9}/rslearn/models/sam2_enc.py +0 -0
  115. {rslearn-0.0.7 → rslearn-0.0.9}/rslearn/models/satlaspretrain.py +0 -0
  116. {rslearn-0.0.7 → rslearn-0.0.9}/rslearn/models/singletask.py +0 -0
  117. {rslearn-0.0.7 → rslearn-0.0.9}/rslearn/models/ssl4eo_s12.py +0 -0
  118. {rslearn-0.0.7 → rslearn-0.0.9}/rslearn/models/swin.py +0 -0
  119. {rslearn-0.0.7 → rslearn-0.0.9}/rslearn/models/task_embedding.py +0 -0
  120. {rslearn-0.0.7 → rslearn-0.0.9}/rslearn/models/terramind.py +0 -0
  121. {rslearn-0.0.7 → rslearn-0.0.9}/rslearn/models/trunk.py +0 -0
  122. {rslearn-0.0.7 → rslearn-0.0.9}/rslearn/models/unet.py +0 -0
  123. {rslearn-0.0.7 → rslearn-0.0.9}/rslearn/models/use_croma.py +0 -0
  124. {rslearn-0.0.7 → rslearn-0.0.9}/rslearn/py.typed +0 -0
  125. {rslearn-0.0.7 → rslearn-0.0.9}/rslearn/template_params.py +0 -0
  126. {rslearn-0.0.7 → rslearn-0.0.9}/rslearn/tile_stores/__init__.py +0 -0
  127. {rslearn-0.0.7 → rslearn-0.0.9}/rslearn/tile_stores/tile_store.py +0 -0
  128. {rslearn-0.0.7 → rslearn-0.0.9}/rslearn/train/__init__.py +0 -0
  129. {rslearn-0.0.7 → rslearn-0.0.9}/rslearn/train/callbacks/__init__.py +0 -0
  130. {rslearn-0.0.7 → rslearn-0.0.9}/rslearn/train/callbacks/adapters.py +0 -0
  131. {rslearn-0.0.7 → rslearn-0.0.9}/rslearn/train/callbacks/freeze_unfreeze.py +0 -0
  132. {rslearn-0.0.7 → rslearn-0.0.9}/rslearn/train/callbacks/gradients.py +0 -0
  133. {rslearn-0.0.7 → rslearn-0.0.9}/rslearn/train/callbacks/peft.py +0 -0
  134. {rslearn-0.0.7 → rslearn-0.0.9}/rslearn/train/data_module.py +0 -0
  135. {rslearn-0.0.7 → rslearn-0.0.9}/rslearn/train/dataset.py +0 -0
  136. {rslearn-0.0.7 → rslearn-0.0.9}/rslearn/train/lightning_module.py +0 -0
  137. {rslearn-0.0.7 → rslearn-0.0.9}/rslearn/train/optimizer.py +0 -0
  138. {rslearn-0.0.7 → rslearn-0.0.9}/rslearn/train/prediction_writer.py +0 -0
  139. {rslearn-0.0.7 → rslearn-0.0.9}/rslearn/train/scheduler.py +0 -0
  140. {rslearn-0.0.7 → rslearn-0.0.9}/rslearn/train/tasks/__init__.py +0 -0
  141. {rslearn-0.0.7 → rslearn-0.0.9}/rslearn/train/tasks/classification.py +0 -0
  142. {rslearn-0.0.7 → rslearn-0.0.9}/rslearn/train/tasks/detection.py +0 -0
  143. {rslearn-0.0.7 → rslearn-0.0.9}/rslearn/train/tasks/multi_task.py +0 -0
  144. {rslearn-0.0.7 → rslearn-0.0.9}/rslearn/train/tasks/per_pixel_regression.py +0 -0
  145. {rslearn-0.0.7 → rslearn-0.0.9}/rslearn/train/tasks/regression.py +0 -0
  146. {rslearn-0.0.7 → rslearn-0.0.9}/rslearn/train/tasks/segmentation.py +0 -0
  147. {rslearn-0.0.7 → rslearn-0.0.9}/rslearn/train/tasks/task.py +0 -0
  148. {rslearn-0.0.7 → rslearn-0.0.9}/rslearn/train/transforms/__init__.py +0 -0
  149. {rslearn-0.0.7 → rslearn-0.0.9}/rslearn/train/transforms/concatenate.py +0 -0
  150. {rslearn-0.0.7 → rslearn-0.0.9}/rslearn/train/transforms/crop.py +0 -0
  151. {rslearn-0.0.7 → rslearn-0.0.9}/rslearn/train/transforms/flip.py +0 -0
  152. {rslearn-0.0.7 → rslearn-0.0.9}/rslearn/train/transforms/mask.py +0 -0
  153. {rslearn-0.0.7 → rslearn-0.0.9}/rslearn/train/transforms/pad.py +0 -0
  154. {rslearn-0.0.7 → rslearn-0.0.9}/rslearn/utils/__init__.py +0 -0
  155. {rslearn-0.0.7 → rslearn-0.0.9}/rslearn/utils/array.py +0 -0
  156. {rslearn-0.0.7 → rslearn-0.0.9}/rslearn/utils/feature.py +0 -0
  157. {rslearn-0.0.7 → rslearn-0.0.9}/rslearn/utils/fsspec.py +0 -0
  158. {rslearn-0.0.7 → rslearn-0.0.9}/rslearn/utils/geometry.py +0 -0
  159. {rslearn-0.0.7 → rslearn-0.0.9}/rslearn/utils/get_utm_ups_crs.py +0 -0
  160. {rslearn-0.0.7 → rslearn-0.0.9}/rslearn/utils/grid_index.py +0 -0
  161. {rslearn-0.0.7 → rslearn-0.0.9}/rslearn/utils/jsonargparse.py +0 -0
  162. {rslearn-0.0.7 → rslearn-0.0.9}/rslearn/utils/mp.py +0 -0
  163. {rslearn-0.0.7 → rslearn-0.0.9}/rslearn/utils/rtree_index.py +0 -0
  164. {rslearn-0.0.7 → rslearn-0.0.9}/rslearn/utils/spatial_index.py +0 -0
  165. {rslearn-0.0.7 → rslearn-0.0.9}/rslearn/utils/sqlite_index.py +0 -0
  166. {rslearn-0.0.7 → rslearn-0.0.9}/rslearn/utils/time.py +0 -0
  167. {rslearn-0.0.7 → rslearn-0.0.9}/rslearn.egg-info/dependency_links.txt +0 -0
  168. {rslearn-0.0.7 → rslearn-0.0.9}/rslearn.egg-info/entry_points.txt +0 -0
  169. {rslearn-0.0.7 → rslearn-0.0.9}/rslearn.egg-info/top_level.txt +0 -0
  170. {rslearn-0.0.7 → rslearn-0.0.9}/setup.cfg +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: rslearn
3
- Version: 0.0.7
3
+ Version: 0.0.9
4
4
  Summary: A library for developing remote sensing datasets and models
5
5
  Author: OlmoEarth Team
6
6
  License: Apache License
@@ -212,7 +212,6 @@ Requires-Python: >=3.11
212
212
  Description-Content-Type: text/markdown
213
213
  License-File: LICENSE
214
214
  Requires-Dist: boto3>=1.39
215
- Requires-Dist: class_registry>=2.1
216
215
  Requires-Dist: fiona>=1.10
217
216
  Requires-Dist: fsspec>=2025.9.0
218
217
  Requires-Dist: jsonargparse>=4.35.0
@@ -233,7 +232,7 @@ Requires-Dist: cdsapi>=0.7.6; extra == "extra"
233
232
  Requires-Dist: earthdaily[platform]>=1.0.7; extra == "extra"
234
233
  Requires-Dist: earthengine-api>=1.6.3; extra == "extra"
235
234
  Requires-Dist: einops>=0.8; extra == "extra"
236
- Requires-Dist: gcsfs>=2025.9.0; extra == "extra"
235
+ Requires-Dist: fsspec[gcs,s3]; extra == "extra"
237
236
  Requires-Dist: google-cloud-bigquery>=3.35; extra == "extra"
238
237
  Requires-Dist: google-cloud-storage>=2.18; extra == "extra"
239
238
  Requires-Dist: huggingface_hub>=0.34.4; extra == "extra"
@@ -244,7 +243,6 @@ Requires-Dist: planetary_computer>=1.0; extra == "extra"
244
243
  Requires-Dist: pycocotools>=2.0; extra == "extra"
245
244
  Requires-Dist: pystac_client>=0.9; extra == "extra"
246
245
  Requires-Dist: rtree>=1.4; extra == "extra"
247
- Requires-Dist: s3fs>=2025.9.0; extra == "extra"
248
246
  Requires-Dist: satlaspretrain_models>=0.3; extra == "extra"
249
247
  Requires-Dist: scipy>=1.16; extra == "extra"
250
248
  Requires-Dist: terratorch>=1.0.2; extra == "extra"
@@ -285,6 +283,7 @@ Quick links:
285
283
  - [Examples](docs/Examples.md) contains more examples, including customizing different
286
284
  stages of rslearn with additional code.
287
285
  - [DatasetConfig](docs/DatasetConfig.md) documents the dataset configuration file.
286
+ - [ModelConfig](docs/ModelConfig.md) documents the model configuration file.
288
287
 
289
288
 
290
289
  Setup
@@ -21,6 +21,7 @@ Quick links:
21
21
  - [Examples](docs/Examples.md) contains more examples, including customizing different
22
22
  stages of rslearn with additional code.
23
23
  - [DatasetConfig](docs/DatasetConfig.md) documents the dataset configuration file.
24
+ - [ModelConfig](docs/ModelConfig.md) documents the model configuration file.
24
25
 
25
26
 
26
27
  Setup
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name = "rslearn"
3
- version = "0.0.7"
3
+ version = "0.0.9"
4
4
  description = "A library for developing remote sensing datasets and models"
5
5
  authors = [
6
6
  { name = "OlmoEarth Team" },
@@ -10,9 +10,8 @@ license = {file = "LICENSE"}
10
10
  requires-python = ">=3.11"
11
11
  dependencies = [
12
12
  "boto3>=1.39",
13
- "class_registry>=2.1",
14
13
  "fiona>=1.10",
15
- "fsspec>=2025.9.0",
14
+ "fsspec>=2025.9.0", # this is used both directly and indirectly (via universal_pathlib) in our code
16
15
  "jsonargparse>=4.35.0",
17
16
  "lightning>=2.5.1.post0",
18
17
  "Pillow>=11.3",
@@ -35,7 +34,9 @@ extra = [
35
34
  "earthdaily[platform]>=1.0.7",
36
35
  "earthengine-api>=1.6.3",
37
36
  "einops>=0.8",
38
- "gcsfs>=2025.9.0",
37
+ # https://github.com/fsspec/universal_pathlib?tab=readme-ov-file#adding-universal_pathlib-to-your-project
38
+ # https://github.com/fsspec/filesystem_spec?tab=readme-ov-file#install
39
+ "fsspec[gcs, s3]", # for both direct use via fsspec and indirect use via universal_pathlib, docs suggest enabling specific backends like this
39
40
  "google-cloud-bigquery>=3.35",
40
41
  "google-cloud-storage>=2.18",
41
42
  "huggingface_hub>=0.34.4",
@@ -46,7 +47,6 @@ extra = [
46
47
  "pycocotools>=2.0",
47
48
  "pystac_client>=0.9",
48
49
  "rtree>=1.4",
49
- "s3fs>=2025.9.0",
50
50
  "satlaspretrain_models>=0.3",
51
51
  "scipy>=1.16",
52
52
  "terratorch>=1.0.2",
@@ -2,12 +2,12 @@
2
2
 
3
3
  import functools
4
4
  import json
5
+ from collections.abc import Callable
5
6
  from typing import Any, Generic, TypeVar
6
7
 
7
8
  import fiona
8
9
  import shapely
9
10
  import shapely.geometry
10
- from class_registry import ClassRegistry
11
11
  from rasterio.crs import CRS
12
12
  from upath import UPath
13
13
 
@@ -23,7 +23,24 @@ from rslearn.utils.geometry import Projection, STGeometry, get_global_geometry
23
23
  from .data_source import DataSource, Item, QueryConfig
24
24
 
25
25
  logger = get_logger("__name__")
26
- Importers = ClassRegistry()
26
+ _ImporterT = TypeVar("_ImporterT", bound="Importer")
27
+
28
+
29
+ class _ImporterRegistry(dict[str, type["Importer"]]):
30
+ """Registry for Importer classes."""
31
+
32
+ def register(self, name: str) -> Callable[[type[_ImporterT]], type[_ImporterT]]:
33
+ """Decorator to register an importer class."""
34
+
35
+ def decorator(cls: type[_ImporterT]) -> type[_ImporterT]:
36
+ self[name] = cls
37
+ return cls
38
+
39
+ return decorator
40
+
41
+
42
+ Importers = _ImporterRegistry()
43
+
27
44
 
28
45
  ItemType = TypeVar("ItemType", bound=Item)
29
46
  LayerConfigType = TypeVar("LayerConfigType", bound=LayerConfig)
@@ -425,7 +442,7 @@ class LocalFiles(DataSource):
425
442
  """
426
443
  self.config = config
427
444
 
428
- self.importer = Importers[config.layer_type.value]
445
+ self.importer = Importers[config.layer_type.value]()
429
446
  self.src_dir = src_dir
430
447
 
431
448
  @staticmethod
@@ -83,6 +83,10 @@ class PlanetaryComputer(DataSource, TileStore):
83
83
 
84
84
  STAC_ENDPOINT = "https://planetarycomputer.microsoft.com/api/stac/v1"
85
85
 
86
+ # Default threshold for recreating the STAC client to prevent memory leaks
87
+ # from the pystac Catalog's resolved objects cache growing unbounded
88
+ DEFAULT_MAX_ITEMS_PER_CLIENT = 1000
89
+
86
90
  def __init__(
87
91
  self,
88
92
  collection_name: str,
@@ -93,6 +97,7 @@ class PlanetaryComputer(DataSource, TileStore):
93
97
  timeout: timedelta = timedelta(seconds=10),
94
98
  skip_items_missing_assets: bool = False,
95
99
  cache_dir: UPath | None = None,
100
+ max_items_per_client: int | None = None,
96
101
  ):
97
102
  """Initialize a new PlanetaryComputer instance.
98
103
 
@@ -109,6 +114,9 @@ class PlanetaryComputer(DataSource, TileStore):
109
114
  cache_dir: optional directory to cache items by name, including asset URLs.
110
115
  If not set, there will be no cache and instead STAC requests will be
111
116
  needed each time.
117
+ max_items_per_client: number of STAC items to process before recreating
118
+ the client to prevent memory leaks from the resolved objects cache.
119
+ Defaults to DEFAULT_MAX_ITEMS_PER_CLIENT.
112
120
  """
113
121
  self.collection_name = collection_name
114
122
  self.asset_bands = asset_bands
@@ -118,12 +126,15 @@ class PlanetaryComputer(DataSource, TileStore):
118
126
  self.timeout = timeout
119
127
  self.skip_items_missing_assets = skip_items_missing_assets
120
128
  self.cache_dir = cache_dir
129
+ self.max_items_per_client = (
130
+ max_items_per_client or self.DEFAULT_MAX_ITEMS_PER_CLIENT
131
+ )
121
132
 
122
133
  if self.cache_dir is not None:
123
134
  self.cache_dir.mkdir(parents=True, exist_ok=True)
124
135
 
125
136
  self.client: pystac_client.Client | None = None
126
- self.collection: pystac_client.CollectionClient | None = None
137
+ self._client_item_count = 0
127
138
 
128
139
  @staticmethod
129
140
  def from_config(config: RasterLayerConfig, ds_path: UPath) -> "PlanetaryComputer":
@@ -142,7 +153,12 @@ class PlanetaryComputer(DataSource, TileStore):
142
153
  if "cache_dir" in d:
143
154
  kwargs["cache_dir"] = join_upath(ds_path, d["cache_dir"])
144
155
 
145
- simple_optionals = ["query", "sort_by", "sort_ascending"]
156
+ simple_optionals = [
157
+ "query",
158
+ "sort_by",
159
+ "sort_ascending",
160
+ "max_items_per_client",
161
+ ]
146
162
  for k in simple_optionals:
147
163
  if k in d:
148
164
  kwargs[k] = d[k]
@@ -151,20 +167,40 @@ class PlanetaryComputer(DataSource, TileStore):
151
167
 
152
168
  def _load_client(
153
169
  self,
154
- ) -> tuple[pystac_client.Client, pystac_client.CollectionClient]:
170
+ ) -> pystac_client.Client:
155
171
  """Lazily load pystac client.
156
172
 
157
173
  We don't load it when creating the data source because it takes time and caller
158
174
  may not be calling get_items. Additionally, loading it during the get_items
159
175
  call enables leveraging the retry loop functionality in
160
176
  prepare_dataset_windows.
161
- """
162
- if self.client is not None:
163
- return self.client, self.collection
164
177
 
178
+ Note: We periodically recreate the client to prevent memory leaks from the
179
+ pystac Catalog's resolved objects cache, which grows unbounded as STAC items
180
+ are deserialized and cached. The cache cannot be cleared or disabled.
181
+ """
182
+ if self.client is None:
183
+ logger.info("Creating initial STAC client")
184
+ self.client = pystac_client.Client.open(self.STAC_ENDPOINT)
185
+ return self.client
186
+
187
+ if self._client_item_count < self.max_items_per_client:
188
+ return self.client
189
+
190
+ # Recreate client to clear the resolved objects cache
191
+ current_client = self.client
192
+ logger.debug(
193
+ "Recreating STAC client after processing %d items (threshold: %d)",
194
+ self._client_item_count,
195
+ self.max_items_per_client,
196
+ )
197
+ client_root = current_client.get_root()
198
+ client_root.clear_links()
199
+ client_root.clear_items()
200
+ client_root.clear_children()
201
+ self._client_item_count = 0
165
202
  self.client = pystac_client.Client.open(self.STAC_ENDPOINT)
166
- self.collection = self.client.get_collection(self.collection_name)
167
- return self.client, self.collection
203
+ return self.client
168
204
 
169
205
  def _stac_item_to_item(self, stac_item: pystac.Item) -> PlanetaryComputerItem:
170
206
  shp = shapely.geometry.shape(stac_item.geometry)
@@ -210,10 +246,26 @@ class PlanetaryComputer(DataSource, TileStore):
210
246
 
211
247
  # No cache or not in cache, so we need to make the STAC request.
212
248
  logger.debug("Getting STAC item {name}")
213
- _, collection = self._load_client()
214
- stac_item = collection.get_item(name)
249
+ client = self._load_client()
250
+
251
+ search_result = client.search(ids=[name], collections=[self.collection_name])
252
+ stac_items = list(search_result.items())
253
+
254
+ if not stac_items:
255
+ raise ValueError(
256
+ f"Item {name} not found in collection {self.collection_name}"
257
+ )
258
+ if len(stac_items) > 1:
259
+ raise ValueError(
260
+ f"Multiple items found for ID {name} in collection {self.collection_name}"
261
+ )
262
+
263
+ stac_item = stac_items[0]
215
264
  item = self._stac_item_to_item(stac_item)
216
265
 
266
+ # Track items processed for client recreation threshold (after deserialization)
267
+ self._client_item_count += 1
268
+
217
269
  # Finally we cache it if cache_dir is set.
218
270
  if cache_fname is not None:
219
271
  with cache_fname.open("w") as f:
@@ -233,7 +285,7 @@ class PlanetaryComputer(DataSource, TileStore):
233
285
  Returns:
234
286
  List of groups of items that should be retrieved for each geometry.
235
287
  """
236
- client, _ = self._load_client()
288
+ client = self._load_client()
237
289
 
238
290
  groups = []
239
291
  for geometry in geometries:
@@ -247,7 +299,9 @@ class PlanetaryComputer(DataSource, TileStore):
247
299
  datetime=wgs84_geometry.time_range,
248
300
  query=self.query,
249
301
  )
250
- stac_items = [item for item in result.item_collection()]
302
+ stac_items = [item for item in result.items()]
303
+ # Track items processed for client recreation threshold (after deserialization)
304
+ self._client_item_count += len(stac_items)
251
305
  logger.debug("STAC search yielded %d items", len(stac_items))
252
306
 
253
307
  if self.skip_items_missing_assets:
@@ -580,7 +634,13 @@ class Sentinel2(PlanetaryComputer):
580
634
  if "cache_dir" in d:
581
635
  kwargs["cache_dir"] = join_upath(ds_path, d["cache_dir"])
582
636
 
583
- simple_optionals = ["harmonize", "query", "sort_by", "sort_ascending"]
637
+ simple_optionals = [
638
+ "harmonize",
639
+ "query",
640
+ "sort_by",
641
+ "sort_ascending",
642
+ "max_items_per_client",
643
+ ]
584
644
  for k in simple_optionals:
585
645
  if k in d:
586
646
  kwargs[k] = d[k]
@@ -756,7 +816,12 @@ class Sentinel1(PlanetaryComputer):
756
816
  if "cache_dir" in d:
757
817
  kwargs["cache_dir"] = join_upath(ds_path, d["cache_dir"])
758
818
 
759
- simple_optionals = ["query", "sort_by", "sort_ascending"]
819
+ simple_optionals = [
820
+ "query",
821
+ "sort_by",
822
+ "sort_ascending",
823
+ "max_items_per_client",
824
+ ]
760
825
  for k in simple_optionals:
761
826
  if k in d:
762
827
  kwargs[k] = d[k]
@@ -0,0 +1,130 @@
1
+ """This module contains dataclasses for summarizing the results of dataset operations.
2
+
3
+ They can be used by callers to emit telemetry / logs, or discarded.
4
+ """
5
+
6
+ from dataclasses import dataclass
7
+
8
+
9
+ @dataclass
10
+ class LayerPrepareSummary:
11
+ """Results for preparing a single layer."""
12
+
13
+ # Identity
14
+ layer_name: str
15
+ data_source_name: str
16
+
17
+ # Timing
18
+ duration_seconds: float
19
+
20
+ # Counts
21
+ windows_prepared: int
22
+ windows_skipped: int
23
+ get_items_attempts: int
24
+
25
+
26
+ @dataclass
27
+ class PrepareDatasetWindowsSummary:
28
+ """Results from prepare_dataset_windows operation for telemetry purposes."""
29
+
30
+ # Timing
31
+ duration_seconds: float
32
+
33
+ # Counts
34
+ total_windows_requested: int
35
+
36
+ # Per-layer summaries
37
+ layer_summaries: list[LayerPrepareSummary]
38
+
39
+
40
+ @dataclass
41
+ class IngestCounts:
42
+ """Known ingestion counts."""
43
+
44
+ items_ingested: int
45
+ geometries_ingested: int
46
+
47
+
48
+ @dataclass
49
+ class UnknownIngestCounts:
50
+ """Indicates ingestion counts are unknown due to partial failure."""
51
+
52
+ items_attempted: int
53
+ geometries_attempted: int
54
+
55
+
56
+ @dataclass
57
+ class LayerIngestSummary:
58
+ """Results for ingesting a single layer."""
59
+
60
+ # Identity
61
+ layer_name: str
62
+ data_source_name: str
63
+
64
+ # Timing
65
+ duration_seconds: float
66
+
67
+ # Counts - either known or unknown
68
+ ingest_counts: IngestCounts | UnknownIngestCounts
69
+ ingest_attempts: int
70
+
71
+
72
+ @dataclass
73
+ class IngestDatasetJobsSummary:
74
+ """Results from ingesting a set of jobs; for telemetry purposes."""
75
+
76
+ # Timing
77
+ duration_seconds: float
78
+
79
+ # Counts
80
+ num_jobs: int
81
+
82
+ # Per-layer summaries
83
+ layer_summaries: list[LayerIngestSummary]
84
+
85
+
86
+ @dataclass
87
+ class MaterializeWindowLayerSummary:
88
+ """Results for materializing a single window layer."""
89
+
90
+ skipped: bool
91
+ materialize_attempts: int
92
+
93
+
94
+ @dataclass
95
+ class MaterializeWindowLayersSummary:
96
+ """Results for materialize a given layer for all windows in a materialize call."""
97
+
98
+ # Identity
99
+ layer_name: str
100
+ data_source_name: str
101
+
102
+ # Timing
103
+ duration_seconds: float
104
+
105
+ # Counts
106
+ total_windows_requested: int
107
+ num_windows_materialized: int
108
+ materialize_attempts: int
109
+
110
+
111
+ @dataclass
112
+ class MaterializeDatasetWindowsSummary:
113
+ """Results from materialize_dataset_windows operation for telemetry purposes."""
114
+
115
+ # Timing
116
+ duration_seconds: float
117
+
118
+ # Counts
119
+ total_windows_requested: int
120
+
121
+ # Per-layer summaries
122
+ layer_summaries: list[MaterializeWindowLayersSummary]
123
+
124
+
125
+ @dataclass
126
+ class ErrorOutcome:
127
+ """TBD what goes in here, if anything."""
128
+
129
+ # Timing
130
+ duration_seconds: float