rslearn 0.0.7__tar.gz → 0.0.9__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {rslearn-0.0.7/rslearn.egg-info → rslearn-0.0.9}/PKG-INFO +3 -4
- {rslearn-0.0.7 → rslearn-0.0.9}/README.md +1 -0
- {rslearn-0.0.7 → rslearn-0.0.9}/pyproject.toml +5 -5
- {rslearn-0.0.7 → rslearn-0.0.9}/rslearn/data_sources/local_files.py +20 -3
- {rslearn-0.0.7 → rslearn-0.0.9}/rslearn/data_sources/planetary_computer.py +79 -14
- rslearn-0.0.9/rslearn/dataset/handler_summaries.py +130 -0
- {rslearn-0.0.7 → rslearn-0.0.9}/rslearn/dataset/manage.py +159 -24
- {rslearn-0.0.7 → rslearn-0.0.9}/rslearn/dataset/materialize.py +21 -2
- {rslearn-0.0.7 → rslearn-0.0.9}/rslearn/dataset/remap.py +29 -4
- {rslearn-0.0.7 → rslearn-0.0.9}/rslearn/main.py +60 -8
- {rslearn-0.0.7 → rslearn-0.0.9}/rslearn/models/clay/clay.py +29 -14
- {rslearn-0.0.7 → rslearn-0.0.9}/rslearn/models/copernicusfm.py +37 -25
- rslearn-0.0.9/rslearn/models/dinov3.py +166 -0
- {rslearn-0.0.7 → rslearn-0.0.9}/rslearn/models/galileo/galileo.py +58 -12
- {rslearn-0.0.7 → rslearn-0.0.9}/rslearn/models/galileo/single_file_galileo.py +7 -1
- {rslearn-0.0.7 → rslearn-0.0.9}/rslearn/models/presto/presto.py +11 -0
- {rslearn-0.0.7 → rslearn-0.0.9}/rslearn/models/prithvi.py +139 -52
- rslearn-0.0.9/rslearn/models/registry.py +22 -0
- rslearn-0.0.9/rslearn/models/resize_features.py +45 -0
- {rslearn-0.0.7 → rslearn-0.0.9}/rslearn/models/simple_time_series.py +65 -10
- {rslearn-0.0.7 → rslearn-0.0.9}/rslearn/models/upsample.py +2 -2
- {rslearn-0.0.7 → rslearn-0.0.9}/rslearn/tile_stores/default.py +34 -7
- {rslearn-0.0.7 → rslearn-0.0.9}/rslearn/train/transforms/normalize.py +34 -5
- rslearn-0.0.9/rslearn/train/transforms/select_bands.py +67 -0
- rslearn-0.0.9/rslearn/train/transforms/sentinel1.py +60 -0
- {rslearn-0.0.7 → rslearn-0.0.9}/rslearn/train/transforms/transform.py +23 -6
- {rslearn-0.0.7 → rslearn-0.0.9}/rslearn/utils/raster_format.py +44 -5
- {rslearn-0.0.7 → rslearn-0.0.9}/rslearn/utils/vector_format.py +35 -4
- {rslearn-0.0.7 → rslearn-0.0.9/rslearn.egg-info}/PKG-INFO +3 -4
- {rslearn-0.0.7 → rslearn-0.0.9}/rslearn.egg-info/SOURCES.txt +5 -0
- {rslearn-0.0.7 → rslearn-0.0.9}/rslearn.egg-info/requires.txt +1 -3
- rslearn-0.0.7/rslearn/models/registry.py +0 -5
- {rslearn-0.0.7 → rslearn-0.0.9}/LICENSE +0 -0
- {rslearn-0.0.7 → rslearn-0.0.9}/rslearn/__init__.py +0 -0
- {rslearn-0.0.7 → rslearn-0.0.9}/rslearn/arg_parser.py +0 -0
- {rslearn-0.0.7 → rslearn-0.0.9}/rslearn/config/__init__.py +0 -0
- {rslearn-0.0.7 → rslearn-0.0.9}/rslearn/config/dataset.py +0 -0
- {rslearn-0.0.7 → rslearn-0.0.9}/rslearn/const.py +0 -0
- {rslearn-0.0.7 → rslearn-0.0.9}/rslearn/data_sources/__init__.py +0 -0
- {rslearn-0.0.7 → rslearn-0.0.9}/rslearn/data_sources/aws_landsat.py +0 -0
- {rslearn-0.0.7 → rslearn-0.0.9}/rslearn/data_sources/aws_open_data.py +0 -0
- {rslearn-0.0.7 → rslearn-0.0.9}/rslearn/data_sources/aws_sentinel1.py +0 -0
- {rslearn-0.0.7 → rslearn-0.0.9}/rslearn/data_sources/climate_data_store.py +0 -0
- {rslearn-0.0.7 → rslearn-0.0.9}/rslearn/data_sources/copernicus.py +0 -0
- {rslearn-0.0.7 → rslearn-0.0.9}/rslearn/data_sources/data_source.py +0 -0
- {rslearn-0.0.7 → rslearn-0.0.9}/rslearn/data_sources/earthdaily.py +0 -0
- {rslearn-0.0.7 → rslearn-0.0.9}/rslearn/data_sources/earthdata_srtm.py +0 -0
- {rslearn-0.0.7 → rslearn-0.0.9}/rslearn/data_sources/eurocrops.py +0 -0
- {rslearn-0.0.7 → rslearn-0.0.9}/rslearn/data_sources/gcp_public_data.py +0 -0
- {rslearn-0.0.7 → rslearn-0.0.9}/rslearn/data_sources/geotiff.py +0 -0
- {rslearn-0.0.7 → rslearn-0.0.9}/rslearn/data_sources/google_earth_engine.py +0 -0
- {rslearn-0.0.7 → rslearn-0.0.9}/rslearn/data_sources/openstreetmap.py +0 -0
- {rslearn-0.0.7 → rslearn-0.0.9}/rslearn/data_sources/planet.py +0 -0
- {rslearn-0.0.7 → rslearn-0.0.9}/rslearn/data_sources/planet_basemap.py +0 -0
- {rslearn-0.0.7 → rslearn-0.0.9}/rslearn/data_sources/raster_source.py +0 -0
- {rslearn-0.0.7 → rslearn-0.0.9}/rslearn/data_sources/usda_cdl.py +0 -0
- {rslearn-0.0.7 → rslearn-0.0.9}/rslearn/data_sources/usgs_landsat.py +0 -0
- {rslearn-0.0.7 → rslearn-0.0.9}/rslearn/data_sources/utils.py +0 -0
- {rslearn-0.0.7 → rslearn-0.0.9}/rslearn/data_sources/vector_source.py +0 -0
- {rslearn-0.0.7 → rslearn-0.0.9}/rslearn/data_sources/worldcereal.py +0 -0
- {rslearn-0.0.7 → rslearn-0.0.9}/rslearn/data_sources/worldcover.py +0 -0
- {rslearn-0.0.7 → rslearn-0.0.9}/rslearn/data_sources/worldpop.py +0 -0
- {rslearn-0.0.7 → rslearn-0.0.9}/rslearn/data_sources/xyz_tiles.py +0 -0
- {rslearn-0.0.7 → rslearn-0.0.9}/rslearn/dataset/__init__.py +0 -0
- {rslearn-0.0.7 → rslearn-0.0.9}/rslearn/dataset/add_windows.py +0 -0
- {rslearn-0.0.7 → rslearn-0.0.9}/rslearn/dataset/dataset.py +0 -0
- {rslearn-0.0.7 → rslearn-0.0.9}/rslearn/dataset/index.py +0 -0
- {rslearn-0.0.7 → rslearn-0.0.9}/rslearn/dataset/window.py +0 -0
- {rslearn-0.0.7 → rslearn-0.0.9}/rslearn/log_utils.py +0 -0
- {rslearn-0.0.7 → rslearn-0.0.9}/rslearn/models/__init__.py +0 -0
- {rslearn-0.0.7 → rslearn-0.0.9}/rslearn/models/anysat.py +0 -0
- {rslearn-0.0.7 → rslearn-0.0.9}/rslearn/models/clay/configs/metadata.yaml +0 -0
- {rslearn-0.0.7 → rslearn-0.0.9}/rslearn/models/clip.py +0 -0
- {rslearn-0.0.7 → rslearn-0.0.9}/rslearn/models/conv.py +0 -0
- {rslearn-0.0.7 → rslearn-0.0.9}/rslearn/models/copernicusfm_src/__init__.py +0 -0
- {rslearn-0.0.7 → rslearn-0.0.9}/rslearn/models/copernicusfm_src/aurora/area.py +0 -0
- {rslearn-0.0.7 → rslearn-0.0.9}/rslearn/models/copernicusfm_src/aurora/fourier.py +0 -0
- {rslearn-0.0.7 → rslearn-0.0.9}/rslearn/models/copernicusfm_src/dynamic_hypernetwork.py +0 -0
- {rslearn-0.0.7 → rslearn-0.0.9}/rslearn/models/copernicusfm_src/flexivit/patch_embed.py +0 -0
- {rslearn-0.0.7 → rslearn-0.0.9}/rslearn/models/copernicusfm_src/flexivit/utils.py +0 -0
- {rslearn-0.0.7 → rslearn-0.0.9}/rslearn/models/copernicusfm_src/model_vit.py +0 -0
- {rslearn-0.0.7 → rslearn-0.0.9}/rslearn/models/copernicusfm_src/util/pos_embed.py +0 -0
- {rslearn-0.0.7 → rslearn-0.0.9}/rslearn/models/croma.py +0 -0
- {rslearn-0.0.7 → rslearn-0.0.9}/rslearn/models/detr/__init__.py +0 -0
- {rslearn-0.0.7 → rslearn-0.0.9}/rslearn/models/detr/box_ops.py +0 -0
- {rslearn-0.0.7 → rslearn-0.0.9}/rslearn/models/detr/detr.py +0 -0
- {rslearn-0.0.7 → rslearn-0.0.9}/rslearn/models/detr/matcher.py +0 -0
- {rslearn-0.0.7 → rslearn-0.0.9}/rslearn/models/detr/position_encoding.py +0 -0
- {rslearn-0.0.7 → rslearn-0.0.9}/rslearn/models/detr/transformer.py +0 -0
- {rslearn-0.0.7 → rslearn-0.0.9}/rslearn/models/detr/util.py +0 -0
- {rslearn-0.0.7 → rslearn-0.0.9}/rslearn/models/faster_rcnn.py +0 -0
- {rslearn-0.0.7 → rslearn-0.0.9}/rslearn/models/fpn.py +0 -0
- {rslearn-0.0.7 → rslearn-0.0.9}/rslearn/models/galileo/__init__.py +0 -0
- {rslearn-0.0.7 → rslearn-0.0.9}/rslearn/models/module_wrapper.py +0 -0
- {rslearn-0.0.7 → rslearn-0.0.9}/rslearn/models/molmo.py +0 -0
- {rslearn-0.0.7 → rslearn-0.0.9}/rslearn/models/multitask.py +0 -0
- {rslearn-0.0.7 → rslearn-0.0.9}/rslearn/models/panopticon.py +0 -0
- {rslearn-0.0.7 → rslearn-0.0.9}/rslearn/models/panopticon_data/sensors/drone.yaml +0 -0
- {rslearn-0.0.7 → rslearn-0.0.9}/rslearn/models/panopticon_data/sensors/enmap.yaml +0 -0
- {rslearn-0.0.7 → rslearn-0.0.9}/rslearn/models/panopticon_data/sensors/goes.yaml +0 -0
- {rslearn-0.0.7 → rslearn-0.0.9}/rslearn/models/panopticon_data/sensors/himawari.yaml +0 -0
- {rslearn-0.0.7 → rslearn-0.0.9}/rslearn/models/panopticon_data/sensors/intuition.yaml +0 -0
- {rslearn-0.0.7 → rslearn-0.0.9}/rslearn/models/panopticon_data/sensors/landsat8.yaml +0 -0
- {rslearn-0.0.7 → rslearn-0.0.9}/rslearn/models/panopticon_data/sensors/modis_terra.yaml +0 -0
- {rslearn-0.0.7 → rslearn-0.0.9}/rslearn/models/panopticon_data/sensors/qb2_ge1.yaml +0 -0
- {rslearn-0.0.7 → rslearn-0.0.9}/rslearn/models/panopticon_data/sensors/sentinel1.yaml +0 -0
- {rslearn-0.0.7 → rslearn-0.0.9}/rslearn/models/panopticon_data/sensors/sentinel2.yaml +0 -0
- {rslearn-0.0.7 → rslearn-0.0.9}/rslearn/models/panopticon_data/sensors/superdove.yaml +0 -0
- {rslearn-0.0.7 → rslearn-0.0.9}/rslearn/models/panopticon_data/sensors/wv23.yaml +0 -0
- {rslearn-0.0.7 → rslearn-0.0.9}/rslearn/models/pick_features.py +0 -0
- {rslearn-0.0.7 → rslearn-0.0.9}/rslearn/models/pooling_decoder.py +0 -0
- {rslearn-0.0.7 → rslearn-0.0.9}/rslearn/models/presto/__init__.py +0 -0
- {rslearn-0.0.7 → rslearn-0.0.9}/rslearn/models/presto/single_file_presto.py +0 -0
- {rslearn-0.0.7 → rslearn-0.0.9}/rslearn/models/sam2_enc.py +0 -0
- {rslearn-0.0.7 → rslearn-0.0.9}/rslearn/models/satlaspretrain.py +0 -0
- {rslearn-0.0.7 → rslearn-0.0.9}/rslearn/models/singletask.py +0 -0
- {rslearn-0.0.7 → rslearn-0.0.9}/rslearn/models/ssl4eo_s12.py +0 -0
- {rslearn-0.0.7 → rslearn-0.0.9}/rslearn/models/swin.py +0 -0
- {rslearn-0.0.7 → rslearn-0.0.9}/rslearn/models/task_embedding.py +0 -0
- {rslearn-0.0.7 → rslearn-0.0.9}/rslearn/models/terramind.py +0 -0
- {rslearn-0.0.7 → rslearn-0.0.9}/rslearn/models/trunk.py +0 -0
- {rslearn-0.0.7 → rslearn-0.0.9}/rslearn/models/unet.py +0 -0
- {rslearn-0.0.7 → rslearn-0.0.9}/rslearn/models/use_croma.py +0 -0
- {rslearn-0.0.7 → rslearn-0.0.9}/rslearn/py.typed +0 -0
- {rslearn-0.0.7 → rslearn-0.0.9}/rslearn/template_params.py +0 -0
- {rslearn-0.0.7 → rslearn-0.0.9}/rslearn/tile_stores/__init__.py +0 -0
- {rslearn-0.0.7 → rslearn-0.0.9}/rslearn/tile_stores/tile_store.py +0 -0
- {rslearn-0.0.7 → rslearn-0.0.9}/rslearn/train/__init__.py +0 -0
- {rslearn-0.0.7 → rslearn-0.0.9}/rslearn/train/callbacks/__init__.py +0 -0
- {rslearn-0.0.7 → rslearn-0.0.9}/rslearn/train/callbacks/adapters.py +0 -0
- {rslearn-0.0.7 → rslearn-0.0.9}/rslearn/train/callbacks/freeze_unfreeze.py +0 -0
- {rslearn-0.0.7 → rslearn-0.0.9}/rslearn/train/callbacks/gradients.py +0 -0
- {rslearn-0.0.7 → rslearn-0.0.9}/rslearn/train/callbacks/peft.py +0 -0
- {rslearn-0.0.7 → rslearn-0.0.9}/rslearn/train/data_module.py +0 -0
- {rslearn-0.0.7 → rslearn-0.0.9}/rslearn/train/dataset.py +0 -0
- {rslearn-0.0.7 → rslearn-0.0.9}/rslearn/train/lightning_module.py +0 -0
- {rslearn-0.0.7 → rslearn-0.0.9}/rslearn/train/optimizer.py +0 -0
- {rslearn-0.0.7 → rslearn-0.0.9}/rslearn/train/prediction_writer.py +0 -0
- {rslearn-0.0.7 → rslearn-0.0.9}/rslearn/train/scheduler.py +0 -0
- {rslearn-0.0.7 → rslearn-0.0.9}/rslearn/train/tasks/__init__.py +0 -0
- {rslearn-0.0.7 → rslearn-0.0.9}/rslearn/train/tasks/classification.py +0 -0
- {rslearn-0.0.7 → rslearn-0.0.9}/rslearn/train/tasks/detection.py +0 -0
- {rslearn-0.0.7 → rslearn-0.0.9}/rslearn/train/tasks/multi_task.py +0 -0
- {rslearn-0.0.7 → rslearn-0.0.9}/rslearn/train/tasks/per_pixel_regression.py +0 -0
- {rslearn-0.0.7 → rslearn-0.0.9}/rslearn/train/tasks/regression.py +0 -0
- {rslearn-0.0.7 → rslearn-0.0.9}/rslearn/train/tasks/segmentation.py +0 -0
- {rslearn-0.0.7 → rslearn-0.0.9}/rslearn/train/tasks/task.py +0 -0
- {rslearn-0.0.7 → rslearn-0.0.9}/rslearn/train/transforms/__init__.py +0 -0
- {rslearn-0.0.7 → rslearn-0.0.9}/rslearn/train/transforms/concatenate.py +0 -0
- {rslearn-0.0.7 → rslearn-0.0.9}/rslearn/train/transforms/crop.py +0 -0
- {rslearn-0.0.7 → rslearn-0.0.9}/rslearn/train/transforms/flip.py +0 -0
- {rslearn-0.0.7 → rslearn-0.0.9}/rslearn/train/transforms/mask.py +0 -0
- {rslearn-0.0.7 → rslearn-0.0.9}/rslearn/train/transforms/pad.py +0 -0
- {rslearn-0.0.7 → rslearn-0.0.9}/rslearn/utils/__init__.py +0 -0
- {rslearn-0.0.7 → rslearn-0.0.9}/rslearn/utils/array.py +0 -0
- {rslearn-0.0.7 → rslearn-0.0.9}/rslearn/utils/feature.py +0 -0
- {rslearn-0.0.7 → rslearn-0.0.9}/rslearn/utils/fsspec.py +0 -0
- {rslearn-0.0.7 → rslearn-0.0.9}/rslearn/utils/geometry.py +0 -0
- {rslearn-0.0.7 → rslearn-0.0.9}/rslearn/utils/get_utm_ups_crs.py +0 -0
- {rslearn-0.0.7 → rslearn-0.0.9}/rslearn/utils/grid_index.py +0 -0
- {rslearn-0.0.7 → rslearn-0.0.9}/rslearn/utils/jsonargparse.py +0 -0
- {rslearn-0.0.7 → rslearn-0.0.9}/rslearn/utils/mp.py +0 -0
- {rslearn-0.0.7 → rslearn-0.0.9}/rslearn/utils/rtree_index.py +0 -0
- {rslearn-0.0.7 → rslearn-0.0.9}/rslearn/utils/spatial_index.py +0 -0
- {rslearn-0.0.7 → rslearn-0.0.9}/rslearn/utils/sqlite_index.py +0 -0
- {rslearn-0.0.7 → rslearn-0.0.9}/rslearn/utils/time.py +0 -0
- {rslearn-0.0.7 → rslearn-0.0.9}/rslearn.egg-info/dependency_links.txt +0 -0
- {rslearn-0.0.7 → rslearn-0.0.9}/rslearn.egg-info/entry_points.txt +0 -0
- {rslearn-0.0.7 → rslearn-0.0.9}/rslearn.egg-info/top_level.txt +0 -0
- {rslearn-0.0.7 → rslearn-0.0.9}/setup.cfg +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: rslearn
|
|
3
|
-
Version: 0.0.
|
|
3
|
+
Version: 0.0.9
|
|
4
4
|
Summary: A library for developing remote sensing datasets and models
|
|
5
5
|
Author: OlmoEarth Team
|
|
6
6
|
License: Apache License
|
|
@@ -212,7 +212,6 @@ Requires-Python: >=3.11
|
|
|
212
212
|
Description-Content-Type: text/markdown
|
|
213
213
|
License-File: LICENSE
|
|
214
214
|
Requires-Dist: boto3>=1.39
|
|
215
|
-
Requires-Dist: class_registry>=2.1
|
|
216
215
|
Requires-Dist: fiona>=1.10
|
|
217
216
|
Requires-Dist: fsspec>=2025.9.0
|
|
218
217
|
Requires-Dist: jsonargparse>=4.35.0
|
|
@@ -233,7 +232,7 @@ Requires-Dist: cdsapi>=0.7.6; extra == "extra"
|
|
|
233
232
|
Requires-Dist: earthdaily[platform]>=1.0.7; extra == "extra"
|
|
234
233
|
Requires-Dist: earthengine-api>=1.6.3; extra == "extra"
|
|
235
234
|
Requires-Dist: einops>=0.8; extra == "extra"
|
|
236
|
-
Requires-Dist:
|
|
235
|
+
Requires-Dist: fsspec[gcs,s3]; extra == "extra"
|
|
237
236
|
Requires-Dist: google-cloud-bigquery>=3.35; extra == "extra"
|
|
238
237
|
Requires-Dist: google-cloud-storage>=2.18; extra == "extra"
|
|
239
238
|
Requires-Dist: huggingface_hub>=0.34.4; extra == "extra"
|
|
@@ -244,7 +243,6 @@ Requires-Dist: planetary_computer>=1.0; extra == "extra"
|
|
|
244
243
|
Requires-Dist: pycocotools>=2.0; extra == "extra"
|
|
245
244
|
Requires-Dist: pystac_client>=0.9; extra == "extra"
|
|
246
245
|
Requires-Dist: rtree>=1.4; extra == "extra"
|
|
247
|
-
Requires-Dist: s3fs>=2025.9.0; extra == "extra"
|
|
248
246
|
Requires-Dist: satlaspretrain_models>=0.3; extra == "extra"
|
|
249
247
|
Requires-Dist: scipy>=1.16; extra == "extra"
|
|
250
248
|
Requires-Dist: terratorch>=1.0.2; extra == "extra"
|
|
@@ -285,6 +283,7 @@ Quick links:
|
|
|
285
283
|
- [Examples](docs/Examples.md) contains more examples, including customizing different
|
|
286
284
|
stages of rslearn with additional code.
|
|
287
285
|
- [DatasetConfig](docs/DatasetConfig.md) documents the dataset configuration file.
|
|
286
|
+
- [ModelConfig](docs/ModelConfig.md) documents the model configuration file.
|
|
288
287
|
|
|
289
288
|
|
|
290
289
|
Setup
|
|
@@ -21,6 +21,7 @@ Quick links:
|
|
|
21
21
|
- [Examples](docs/Examples.md) contains more examples, including customizing different
|
|
22
22
|
stages of rslearn with additional code.
|
|
23
23
|
- [DatasetConfig](docs/DatasetConfig.md) documents the dataset configuration file.
|
|
24
|
+
- [ModelConfig](docs/ModelConfig.md) documents the model configuration file.
|
|
24
25
|
|
|
25
26
|
|
|
26
27
|
Setup
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
[project]
|
|
2
2
|
name = "rslearn"
|
|
3
|
-
version = "0.0.
|
|
3
|
+
version = "0.0.9"
|
|
4
4
|
description = "A library for developing remote sensing datasets and models"
|
|
5
5
|
authors = [
|
|
6
6
|
{ name = "OlmoEarth Team" },
|
|
@@ -10,9 +10,8 @@ license = {file = "LICENSE"}
|
|
|
10
10
|
requires-python = ">=3.11"
|
|
11
11
|
dependencies = [
|
|
12
12
|
"boto3>=1.39",
|
|
13
|
-
"class_registry>=2.1",
|
|
14
13
|
"fiona>=1.10",
|
|
15
|
-
"fsspec>=2025.9.0",
|
|
14
|
+
"fsspec>=2025.9.0", # this is used both directly and indirectly (via universal_pathlib) in our code
|
|
16
15
|
"jsonargparse>=4.35.0",
|
|
17
16
|
"lightning>=2.5.1.post0",
|
|
18
17
|
"Pillow>=11.3",
|
|
@@ -35,7 +34,9 @@ extra = [
|
|
|
35
34
|
"earthdaily[platform]>=1.0.7",
|
|
36
35
|
"earthengine-api>=1.6.3",
|
|
37
36
|
"einops>=0.8",
|
|
38
|
-
|
|
37
|
+
# https://github.com/fsspec/universal_pathlib?tab=readme-ov-file#adding-universal_pathlib-to-your-project
|
|
38
|
+
# https://github.com/fsspec/filesystem_spec?tab=readme-ov-file#install
|
|
39
|
+
"fsspec[gcs, s3]", # for both direct use via fsspec and indirect use via universal_pathlib, docs suggest enabling specific backends like this
|
|
39
40
|
"google-cloud-bigquery>=3.35",
|
|
40
41
|
"google-cloud-storage>=2.18",
|
|
41
42
|
"huggingface_hub>=0.34.4",
|
|
@@ -46,7 +47,6 @@ extra = [
|
|
|
46
47
|
"pycocotools>=2.0",
|
|
47
48
|
"pystac_client>=0.9",
|
|
48
49
|
"rtree>=1.4",
|
|
49
|
-
"s3fs>=2025.9.0",
|
|
50
50
|
"satlaspretrain_models>=0.3",
|
|
51
51
|
"scipy>=1.16",
|
|
52
52
|
"terratorch>=1.0.2",
|
|
@@ -2,12 +2,12 @@
|
|
|
2
2
|
|
|
3
3
|
import functools
|
|
4
4
|
import json
|
|
5
|
+
from collections.abc import Callable
|
|
5
6
|
from typing import Any, Generic, TypeVar
|
|
6
7
|
|
|
7
8
|
import fiona
|
|
8
9
|
import shapely
|
|
9
10
|
import shapely.geometry
|
|
10
|
-
from class_registry import ClassRegistry
|
|
11
11
|
from rasterio.crs import CRS
|
|
12
12
|
from upath import UPath
|
|
13
13
|
|
|
@@ -23,7 +23,24 @@ from rslearn.utils.geometry import Projection, STGeometry, get_global_geometry
|
|
|
23
23
|
from .data_source import DataSource, Item, QueryConfig
|
|
24
24
|
|
|
25
25
|
logger = get_logger("__name__")
|
|
26
|
-
|
|
26
|
+
_ImporterT = TypeVar("_ImporterT", bound="Importer")
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
class _ImporterRegistry(dict[str, type["Importer"]]):
|
|
30
|
+
"""Registry for Importer classes."""
|
|
31
|
+
|
|
32
|
+
def register(self, name: str) -> Callable[[type[_ImporterT]], type[_ImporterT]]:
|
|
33
|
+
"""Decorator to register an importer class."""
|
|
34
|
+
|
|
35
|
+
def decorator(cls: type[_ImporterT]) -> type[_ImporterT]:
|
|
36
|
+
self[name] = cls
|
|
37
|
+
return cls
|
|
38
|
+
|
|
39
|
+
return decorator
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
Importers = _ImporterRegistry()
|
|
43
|
+
|
|
27
44
|
|
|
28
45
|
ItemType = TypeVar("ItemType", bound=Item)
|
|
29
46
|
LayerConfigType = TypeVar("LayerConfigType", bound=LayerConfig)
|
|
@@ -425,7 +442,7 @@ class LocalFiles(DataSource):
|
|
|
425
442
|
"""
|
|
426
443
|
self.config = config
|
|
427
444
|
|
|
428
|
-
self.importer = Importers[config.layer_type.value]
|
|
445
|
+
self.importer = Importers[config.layer_type.value]()
|
|
429
446
|
self.src_dir = src_dir
|
|
430
447
|
|
|
431
448
|
@staticmethod
|
|
@@ -83,6 +83,10 @@ class PlanetaryComputer(DataSource, TileStore):
|
|
|
83
83
|
|
|
84
84
|
STAC_ENDPOINT = "https://planetarycomputer.microsoft.com/api/stac/v1"
|
|
85
85
|
|
|
86
|
+
# Default threshold for recreating the STAC client to prevent memory leaks
|
|
87
|
+
# from the pystac Catalog's resolved objects cache growing unbounded
|
|
88
|
+
DEFAULT_MAX_ITEMS_PER_CLIENT = 1000
|
|
89
|
+
|
|
86
90
|
def __init__(
|
|
87
91
|
self,
|
|
88
92
|
collection_name: str,
|
|
@@ -93,6 +97,7 @@ class PlanetaryComputer(DataSource, TileStore):
|
|
|
93
97
|
timeout: timedelta = timedelta(seconds=10),
|
|
94
98
|
skip_items_missing_assets: bool = False,
|
|
95
99
|
cache_dir: UPath | None = None,
|
|
100
|
+
max_items_per_client: int | None = None,
|
|
96
101
|
):
|
|
97
102
|
"""Initialize a new PlanetaryComputer instance.
|
|
98
103
|
|
|
@@ -109,6 +114,9 @@ class PlanetaryComputer(DataSource, TileStore):
|
|
|
109
114
|
cache_dir: optional directory to cache items by name, including asset URLs.
|
|
110
115
|
If not set, there will be no cache and instead STAC requests will be
|
|
111
116
|
needed each time.
|
|
117
|
+
max_items_per_client: number of STAC items to process before recreating
|
|
118
|
+
the client to prevent memory leaks from the resolved objects cache.
|
|
119
|
+
Defaults to DEFAULT_MAX_ITEMS_PER_CLIENT.
|
|
112
120
|
"""
|
|
113
121
|
self.collection_name = collection_name
|
|
114
122
|
self.asset_bands = asset_bands
|
|
@@ -118,12 +126,15 @@ class PlanetaryComputer(DataSource, TileStore):
|
|
|
118
126
|
self.timeout = timeout
|
|
119
127
|
self.skip_items_missing_assets = skip_items_missing_assets
|
|
120
128
|
self.cache_dir = cache_dir
|
|
129
|
+
self.max_items_per_client = (
|
|
130
|
+
max_items_per_client or self.DEFAULT_MAX_ITEMS_PER_CLIENT
|
|
131
|
+
)
|
|
121
132
|
|
|
122
133
|
if self.cache_dir is not None:
|
|
123
134
|
self.cache_dir.mkdir(parents=True, exist_ok=True)
|
|
124
135
|
|
|
125
136
|
self.client: pystac_client.Client | None = None
|
|
126
|
-
self.
|
|
137
|
+
self._client_item_count = 0
|
|
127
138
|
|
|
128
139
|
@staticmethod
|
|
129
140
|
def from_config(config: RasterLayerConfig, ds_path: UPath) -> "PlanetaryComputer":
|
|
@@ -142,7 +153,12 @@ class PlanetaryComputer(DataSource, TileStore):
|
|
|
142
153
|
if "cache_dir" in d:
|
|
143
154
|
kwargs["cache_dir"] = join_upath(ds_path, d["cache_dir"])
|
|
144
155
|
|
|
145
|
-
simple_optionals = [
|
|
156
|
+
simple_optionals = [
|
|
157
|
+
"query",
|
|
158
|
+
"sort_by",
|
|
159
|
+
"sort_ascending",
|
|
160
|
+
"max_items_per_client",
|
|
161
|
+
]
|
|
146
162
|
for k in simple_optionals:
|
|
147
163
|
if k in d:
|
|
148
164
|
kwargs[k] = d[k]
|
|
@@ -151,20 +167,40 @@ class PlanetaryComputer(DataSource, TileStore):
|
|
|
151
167
|
|
|
152
168
|
def _load_client(
|
|
153
169
|
self,
|
|
154
|
-
) ->
|
|
170
|
+
) -> pystac_client.Client:
|
|
155
171
|
"""Lazily load pystac client.
|
|
156
172
|
|
|
157
173
|
We don't load it when creating the data source because it takes time and caller
|
|
158
174
|
may not be calling get_items. Additionally, loading it during the get_items
|
|
159
175
|
call enables leveraging the retry loop functionality in
|
|
160
176
|
prepare_dataset_windows.
|
|
161
|
-
"""
|
|
162
|
-
if self.client is not None:
|
|
163
|
-
return self.client, self.collection
|
|
164
177
|
|
|
178
|
+
Note: We periodically recreate the client to prevent memory leaks from the
|
|
179
|
+
pystac Catalog's resolved objects cache, which grows unbounded as STAC items
|
|
180
|
+
are deserialized and cached. The cache cannot be cleared or disabled.
|
|
181
|
+
"""
|
|
182
|
+
if self.client is None:
|
|
183
|
+
logger.info("Creating initial STAC client")
|
|
184
|
+
self.client = pystac_client.Client.open(self.STAC_ENDPOINT)
|
|
185
|
+
return self.client
|
|
186
|
+
|
|
187
|
+
if self._client_item_count < self.max_items_per_client:
|
|
188
|
+
return self.client
|
|
189
|
+
|
|
190
|
+
# Recreate client to clear the resolved objects cache
|
|
191
|
+
current_client = self.client
|
|
192
|
+
logger.debug(
|
|
193
|
+
"Recreating STAC client after processing %d items (threshold: %d)",
|
|
194
|
+
self._client_item_count,
|
|
195
|
+
self.max_items_per_client,
|
|
196
|
+
)
|
|
197
|
+
client_root = current_client.get_root()
|
|
198
|
+
client_root.clear_links()
|
|
199
|
+
client_root.clear_items()
|
|
200
|
+
client_root.clear_children()
|
|
201
|
+
self._client_item_count = 0
|
|
165
202
|
self.client = pystac_client.Client.open(self.STAC_ENDPOINT)
|
|
166
|
-
|
|
167
|
-
return self.client, self.collection
|
|
203
|
+
return self.client
|
|
168
204
|
|
|
169
205
|
def _stac_item_to_item(self, stac_item: pystac.Item) -> PlanetaryComputerItem:
|
|
170
206
|
shp = shapely.geometry.shape(stac_item.geometry)
|
|
@@ -210,10 +246,26 @@ class PlanetaryComputer(DataSource, TileStore):
|
|
|
210
246
|
|
|
211
247
|
# No cache or not in cache, so we need to make the STAC request.
|
|
212
248
|
logger.debug("Getting STAC item {name}")
|
|
213
|
-
|
|
214
|
-
|
|
249
|
+
client = self._load_client()
|
|
250
|
+
|
|
251
|
+
search_result = client.search(ids=[name], collections=[self.collection_name])
|
|
252
|
+
stac_items = list(search_result.items())
|
|
253
|
+
|
|
254
|
+
if not stac_items:
|
|
255
|
+
raise ValueError(
|
|
256
|
+
f"Item {name} not found in collection {self.collection_name}"
|
|
257
|
+
)
|
|
258
|
+
if len(stac_items) > 1:
|
|
259
|
+
raise ValueError(
|
|
260
|
+
f"Multiple items found for ID {name} in collection {self.collection_name}"
|
|
261
|
+
)
|
|
262
|
+
|
|
263
|
+
stac_item = stac_items[0]
|
|
215
264
|
item = self._stac_item_to_item(stac_item)
|
|
216
265
|
|
|
266
|
+
# Track items processed for client recreation threshold (after deserialization)
|
|
267
|
+
self._client_item_count += 1
|
|
268
|
+
|
|
217
269
|
# Finally we cache it if cache_dir is set.
|
|
218
270
|
if cache_fname is not None:
|
|
219
271
|
with cache_fname.open("w") as f:
|
|
@@ -233,7 +285,7 @@ class PlanetaryComputer(DataSource, TileStore):
|
|
|
233
285
|
Returns:
|
|
234
286
|
List of groups of items that should be retrieved for each geometry.
|
|
235
287
|
"""
|
|
236
|
-
client
|
|
288
|
+
client = self._load_client()
|
|
237
289
|
|
|
238
290
|
groups = []
|
|
239
291
|
for geometry in geometries:
|
|
@@ -247,7 +299,9 @@ class PlanetaryComputer(DataSource, TileStore):
|
|
|
247
299
|
datetime=wgs84_geometry.time_range,
|
|
248
300
|
query=self.query,
|
|
249
301
|
)
|
|
250
|
-
stac_items = [item for item in result.
|
|
302
|
+
stac_items = [item for item in result.items()]
|
|
303
|
+
# Track items processed for client recreation threshold (after deserialization)
|
|
304
|
+
self._client_item_count += len(stac_items)
|
|
251
305
|
logger.debug("STAC search yielded %d items", len(stac_items))
|
|
252
306
|
|
|
253
307
|
if self.skip_items_missing_assets:
|
|
@@ -580,7 +634,13 @@ class Sentinel2(PlanetaryComputer):
|
|
|
580
634
|
if "cache_dir" in d:
|
|
581
635
|
kwargs["cache_dir"] = join_upath(ds_path, d["cache_dir"])
|
|
582
636
|
|
|
583
|
-
simple_optionals = [
|
|
637
|
+
simple_optionals = [
|
|
638
|
+
"harmonize",
|
|
639
|
+
"query",
|
|
640
|
+
"sort_by",
|
|
641
|
+
"sort_ascending",
|
|
642
|
+
"max_items_per_client",
|
|
643
|
+
]
|
|
584
644
|
for k in simple_optionals:
|
|
585
645
|
if k in d:
|
|
586
646
|
kwargs[k] = d[k]
|
|
@@ -756,7 +816,12 @@ class Sentinel1(PlanetaryComputer):
|
|
|
756
816
|
if "cache_dir" in d:
|
|
757
817
|
kwargs["cache_dir"] = join_upath(ds_path, d["cache_dir"])
|
|
758
818
|
|
|
759
|
-
simple_optionals = [
|
|
819
|
+
simple_optionals = [
|
|
820
|
+
"query",
|
|
821
|
+
"sort_by",
|
|
822
|
+
"sort_ascending",
|
|
823
|
+
"max_items_per_client",
|
|
824
|
+
]
|
|
760
825
|
for k in simple_optionals:
|
|
761
826
|
if k in d:
|
|
762
827
|
kwargs[k] = d[k]
|
|
@@ -0,0 +1,130 @@
|
|
|
1
|
+
"""This module contains dataclasses for summarizing the results of dataset operations.
|
|
2
|
+
|
|
3
|
+
They can be used by callers to emit telemetry / logs, or discarded.
|
|
4
|
+
"""
|
|
5
|
+
|
|
6
|
+
from dataclasses import dataclass
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
@dataclass
|
|
10
|
+
class LayerPrepareSummary:
|
|
11
|
+
"""Results for preparing a single layer."""
|
|
12
|
+
|
|
13
|
+
# Identity
|
|
14
|
+
layer_name: str
|
|
15
|
+
data_source_name: str
|
|
16
|
+
|
|
17
|
+
# Timing
|
|
18
|
+
duration_seconds: float
|
|
19
|
+
|
|
20
|
+
# Counts
|
|
21
|
+
windows_prepared: int
|
|
22
|
+
windows_skipped: int
|
|
23
|
+
get_items_attempts: int
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
@dataclass
|
|
27
|
+
class PrepareDatasetWindowsSummary:
|
|
28
|
+
"""Results from prepare_dataset_windows operation for telemetry purposes."""
|
|
29
|
+
|
|
30
|
+
# Timing
|
|
31
|
+
duration_seconds: float
|
|
32
|
+
|
|
33
|
+
# Counts
|
|
34
|
+
total_windows_requested: int
|
|
35
|
+
|
|
36
|
+
# Per-layer summaries
|
|
37
|
+
layer_summaries: list[LayerPrepareSummary]
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
@dataclass
|
|
41
|
+
class IngestCounts:
|
|
42
|
+
"""Known ingestion counts."""
|
|
43
|
+
|
|
44
|
+
items_ingested: int
|
|
45
|
+
geometries_ingested: int
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
@dataclass
|
|
49
|
+
class UnknownIngestCounts:
|
|
50
|
+
"""Indicates ingestion counts are unknown due to partial failure."""
|
|
51
|
+
|
|
52
|
+
items_attempted: int
|
|
53
|
+
geometries_attempted: int
|
|
54
|
+
|
|
55
|
+
|
|
56
|
+
@dataclass
|
|
57
|
+
class LayerIngestSummary:
|
|
58
|
+
"""Results for ingesting a single layer."""
|
|
59
|
+
|
|
60
|
+
# Identity
|
|
61
|
+
layer_name: str
|
|
62
|
+
data_source_name: str
|
|
63
|
+
|
|
64
|
+
# Timing
|
|
65
|
+
duration_seconds: float
|
|
66
|
+
|
|
67
|
+
# Counts - either known or unknown
|
|
68
|
+
ingest_counts: IngestCounts | UnknownIngestCounts
|
|
69
|
+
ingest_attempts: int
|
|
70
|
+
|
|
71
|
+
|
|
72
|
+
@dataclass
|
|
73
|
+
class IngestDatasetJobsSummary:
|
|
74
|
+
"""Results from ingesting a set of jobs; for telemetry purposes."""
|
|
75
|
+
|
|
76
|
+
# Timing
|
|
77
|
+
duration_seconds: float
|
|
78
|
+
|
|
79
|
+
# Counts
|
|
80
|
+
num_jobs: int
|
|
81
|
+
|
|
82
|
+
# Per-layer summaries
|
|
83
|
+
layer_summaries: list[LayerIngestSummary]
|
|
84
|
+
|
|
85
|
+
|
|
86
|
+
@dataclass
|
|
87
|
+
class MaterializeWindowLayerSummary:
|
|
88
|
+
"""Results for materializing a single window layer."""
|
|
89
|
+
|
|
90
|
+
skipped: bool
|
|
91
|
+
materialize_attempts: int
|
|
92
|
+
|
|
93
|
+
|
|
94
|
+
@dataclass
|
|
95
|
+
class MaterializeWindowLayersSummary:
|
|
96
|
+
"""Results for materialize a given layer for all windows in a materialize call."""
|
|
97
|
+
|
|
98
|
+
# Identity
|
|
99
|
+
layer_name: str
|
|
100
|
+
data_source_name: str
|
|
101
|
+
|
|
102
|
+
# Timing
|
|
103
|
+
duration_seconds: float
|
|
104
|
+
|
|
105
|
+
# Counts
|
|
106
|
+
total_windows_requested: int
|
|
107
|
+
num_windows_materialized: int
|
|
108
|
+
materialize_attempts: int
|
|
109
|
+
|
|
110
|
+
|
|
111
|
+
@dataclass
|
|
112
|
+
class MaterializeDatasetWindowsSummary:
|
|
113
|
+
"""Results from materialize_dataset_windows operation for telemetry purposes."""
|
|
114
|
+
|
|
115
|
+
# Timing
|
|
116
|
+
duration_seconds: float
|
|
117
|
+
|
|
118
|
+
# Counts
|
|
119
|
+
total_windows_requested: int
|
|
120
|
+
|
|
121
|
+
# Per-layer summaries
|
|
122
|
+
layer_summaries: list[MaterializeWindowLayersSummary]
|
|
123
|
+
|
|
124
|
+
|
|
125
|
+
@dataclass
|
|
126
|
+
class ErrorOutcome:
|
|
127
|
+
"""TBD what goes in here, if anything."""
|
|
128
|
+
|
|
129
|
+
# Timing
|
|
130
|
+
duration_seconds: float
|