PyPI - rslearn - Versions diffs - 0.0.7__py3-none-any.whl → 0.0.9__py3-none-any.whl - Mend

rslearn 0.0.7py3-none-any.whl → 0.0.9py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (31) hide show

rslearn/data_sources/local_files.py +20 -3
rslearn/data_sources/planetary_computer.py +79 -14
rslearn/dataset/handler_summaries.py +130 -0
rslearn/dataset/manage.py +159 -24
rslearn/dataset/materialize.py +21 -2
rslearn/dataset/remap.py +29 -4
rslearn/main.py +60 -8
rslearn/models/clay/clay.py +29 -14
rslearn/models/copernicusfm.py +37 -25
rslearn/models/dinov3.py +166 -0
rslearn/models/galileo/galileo.py +58 -12
rslearn/models/galileo/single_file_galileo.py +7 -1
rslearn/models/presto/presto.py +11 -0
rslearn/models/prithvi.py +139 -52
rslearn/models/registry.py +19 -2
rslearn/models/resize_features.py +45 -0
rslearn/models/simple_time_series.py +65 -10
rslearn/models/upsample.py +2 -2
rslearn/tile_stores/default.py +34 -7
rslearn/train/transforms/normalize.py +34 -5
rslearn/train/transforms/select_bands.py +67 -0
rslearn/train/transforms/sentinel1.py +60 -0
rslearn/train/transforms/transform.py +23 -6
rslearn/utils/raster_format.py +44 -5
rslearn/utils/vector_format.py +35 -4
{rslearn-0.0.7.dist-info → rslearn-0.0.9.dist-info}/METADATA +3 -4
{rslearn-0.0.7.dist-info → rslearn-0.0.9.dist-info}/RECORD +31 -26
{rslearn-0.0.7.dist-info → rslearn-0.0.9.dist-info}/WHEEL +0 -0
{rslearn-0.0.7.dist-info → rslearn-0.0.9.dist-info}/entry_points.txt +0 -0
{rslearn-0.0.7.dist-info → rslearn-0.0.9.dist-info}/licenses/LICENSE +0 -0
{rslearn-0.0.7.dist-info → rslearn-0.0.9.dist-info}/top_level.txt +0 -0

rslearn/tile_stores/default.py CHANGED Viewed

@@ -1,5 +1,6 @@
 """Default TileStore implementation."""
+import json
 import math
 import shutil
 from typing import Any
@@ -35,6 +36,9 @@ from .tile_store import TileStore
 # Special filename to indicate writing is done.
 COMPLETED_FNAME = "completed"
+# Special filename to store the bands that are present in a raster.
+BANDS_FNAME = "bands.json"
 class DefaultTileStore(TileStore):
     """Default TileStore implementation.
@@ -84,7 +88,7 @@ class DefaultTileStore(TileStore):
         self.path = join_upath(ds_path, self.path_suffix)
     def _get_raster_dir(
-        self, layer_name: str, item_name: str, bands: list[str]
+        self, layer_name: str, item_name: str, bands: list[str], write: bool = False
     ) -> UPath:
         """Get the directory where the specified raster is stored.
@@ -92,12 +96,21 @@ class DefaultTileStore(TileStore):
             layer_name: the name of the dataset layer.
             item_name: the name of the item from the data source.
             bands: list of band names that are expected to be stored together.
+            write: whether to create the directory and write the bands to a file inside
+                the directory.
         Returns:
             the UPath directory where the raster should be stored.
         """
         assert self.path is not None
-        return self.path / layer_name / item_name / get_bandset_dirname(bands)
+        dir_name = self.path / layer_name / item_name / get_bandset_dirname(bands)
+        if write:
+            dir_name.mkdir(parents=True, exist_ok=True)
+            with (dir_name / BANDS_FNAME).open("w") as f:
+                json.dump(bands, f)
+        return dir_name
     def _get_raster_fname(
         self, layer_name: str, item_name: str, bands: list[str]
@@ -117,10 +130,12 @@ class DefaultTileStore(TileStore):
         """
         raster_dir = self._get_raster_dir(layer_name, item_name, bands)
         for fname in raster_dir.iterdir():
-            # Ignore completed sentinel files as well as temporary files created by
+            # Ignore completed sentinel files, bands files, as well as temporary files created by
             # open_atomic (in case this tile store is on local filesystem).
             if fname.name == COMPLETED_FNAME:
                 continue
+            if fname.name == BANDS_FNAME:
+                continue
             if ".tmp." in fname.name:
                 continue
             return fname
@@ -161,8 +176,20 @@ class DefaultTileStore(TileStore):
         bands: list[list[str]] = []
         for raster_dir in item_dir.iterdir():
-            parts = raster_dir.name.split("_")
-            bands.append(parts)
+            if not (raster_dir / BANDS_FNAME).exists():
+                # This is likely a legacy directory where the bands are only encoded in
+                # the directory name, so we have to rely on that.
+                parts = raster_dir.name.split("_")
+                bands.append(parts)
+                continue
+            # We use the BANDS_FNAME here -- although it is slower to read the file, it
+            # is more reliable since sometimes the directory name is a hash of the
+            # bands in case there are too many bands (filename too long) or some bands
+            # contain the underscore character.
+            with (raster_dir / BANDS_FNAME).open() as f:
+                bands.append(json.load(f))
         return bands
     def get_raster_bounds(
@@ -248,7 +275,7 @@ class DefaultTileStore(TileStore):
             bounds: the bounds of the array.
             array: the raster data.
         """
-        raster_dir = self._get_raster_dir(layer_name, item_name, bands)
+        raster_dir = self._get_raster_dir(layer_name, item_name, bands, write=True)
         raster_format = GeotiffRasterFormat(geotiff_options=self.geotiff_options)
         raster_format.encode_raster(raster_dir, projection, bounds, array)
         (raster_dir / COMPLETED_FNAME).touch()
@@ -264,7 +291,7 @@ class DefaultTileStore(TileStore):
             bands: the list of bands in the array.
             fname: the raster file, which must be readable by rasterio.
         """
-        raster_dir = self._get_raster_dir(layer_name, item_name, bands)
+        raster_dir = self._get_raster_dir(layer_name, item_name, bands, write=True)
         raster_dir.mkdir(parents=True, exist_ok=True)
         if self.convert_rasters_to_cogs:

rslearn/train/transforms/normalize.py CHANGED Viewed

@@ -27,14 +27,18 @@ class Normalize(Transform):
         Args:
             mean: a single value or one mean per channel
-            std: a single value or one std per channel
+            std: a single value or one std per channel (must match the shape of mean)
             valid_range: optionally clip to a minimum and maximum value
             selectors: image items to transform
-            bands: optionally restrict the normalization to these bands
+            bands: optionally restrict the normalization to these band indices. If set,
+                mean and std must either be one value, or have length equal to the
+                number of band indices passed here.
             num_bands: the number of bands per image, to distinguish different images
                 in a time series. If set, then the bands list is repeated for each
                 image, e.g. if bands=[2] then we apply normalization on images[2],
-                images[2+num_bands], images[2+num_bands*2], etc.
+                images[2+num_bands], images[2+num_bands*2], etc. Or if the bands list
+                is not set, then we apply the mean and std on each image in the time
+                series.
         """
         super().__init__()
         self.mean = torch.tensor(mean)
@@ -57,6 +61,23 @@ class Normalize(Transform):
         Args:
             image: the image to transform.
         """
+        def _repeat_mean_and_std(
+            image_channels: int, num_bands: int | None
+        ) -> tuple[torch.Tensor, torch.Tensor]:
+            """Get mean and std tensor that are suitable for applying on the image."""
+            # We only need to repeat the tensor if both of these are true:
+            # - The mean/std are not just one scalar.
+            # - self.num_bands is set, otherwise we treat the input as a single image.
+            if len(self.mean.shape) == 0:
+                return self.mean, self.std
+            if num_bands is None:
+                return self.mean, self.std
+            num_images = image_channels // num_bands
+            return self.mean.repeat(num_images)[:, None, None], self.std.repeat(
+                num_images
+            )[:, None, None]
         if self.bands is not None:
             # User has provided band indices to normalize.
             # If num_bands is set, then we repeat these for each image in the input
@@ -72,13 +93,21 @@ class Normalize(Transform):
                     dim=0,
                 )
-            image[band_indices] = (image[band_indices] - self.mean) / self.std
+            # We use len(self.bands) here because that is how many bands per timestep
+            # we are actually processing with the mean/std.
+            mean, std = _repeat_mean_and_std(
+                image_channels=len(band_indices), num_bands=len(self.bands)
+            )
+            image[band_indices] = (image[band_indices] - mean) / std
             if self.valid_min is not None:
                 image[band_indices] = torch.clamp(
                     image[band_indices], min=self.valid_min, max=self.valid_max
                 )
         else:
-            image = (image - self.mean) / self.std
+            mean, std = _repeat_mean_and_std(
+                image_channels=image.shape[0], num_bands=self.num_bands
+            )
+            image = (image - mean) / std
             if self.valid_min is not None:
                 image = torch.clamp(image, min=self.valid_min, max=self.valid_max)
         return image

rslearn/train/transforms/select_bands.py ADDED Viewed

@@ -0,0 +1,67 @@
+"""The SelectBands transform."""
+from typing import Any
+from .transform import Transform, read_selector, write_selector
+class SelectBands(Transform):
+    """Select a subset of bands from an image."""
+    def __init__(
+        self,
+        band_indices: list[int],
+        input_selector: str = "image",
+        output_selector: str = "image",
+        num_bands_per_timestep: int | None = None,
+    ):
+        """Initialize a new Concatenate.
+        Args:
+            band_indices: the bands to select.
+            input_selector: the selector to read the input image.
+            output_selector: the output selector under which to save the output image.
+            num_bands_per_timestep: the number of bands per image, to distinguish
+                between stacked images in an image time series. If set, then the
+                band_indices are selected for each image in the time series.
+        """
+        super().__init__()
+        self.input_selector = input_selector
+        self.output_selector = output_selector
+        self.band_indices = band_indices
+        self.num_bands_per_timestep = num_bands_per_timestep
+    def forward(
+        self, input_dict: dict[str, Any], target_dict: dict[str, Any]
+    ) -> tuple[dict[str, Any], dict[str, Any]]:
+        """Apply concatenation over the inputs and targets.
+        Args:
+            input_dict: the input
+            target_dict: the target
+        Returns:
+            normalized (input_dicts, target_dicts) tuple
+        """
+        image = read_selector(input_dict, target_dict, self.input_selector)
+        num_bands_per_timestep = (
+            self.num_bands_per_timestep
+            if self.num_bands_per_timestep is not None
+            else image.shape[0]
+        )
+        if image.shape[0] % num_bands_per_timestep != 0:
+            raise ValueError(
+                f"channel dimension {image.shape[0]} is not multiple of bands per timestep {num_bands_per_timestep}"
+            )
+        # Copy the band indices for each timestep in the input.
+        wanted_bands: list[int] = []
+        for start_channel_idx in range(0, image.shape[0], num_bands_per_timestep):
+            wanted_bands.extend(
+                [(start_channel_idx + band_idx) for band_idx in self.band_indices]
+            )
+        result = image[wanted_bands]
+        write_selector(input_dict, target_dict, self.output_selector, result)
+        return input_dict, target_dict

rslearn/train/transforms/sentinel1.py ADDED Viewed

@@ -0,0 +1,60 @@
+"""Transforms related to Sentinel-1 data."""
+from typing import Any
+import torch
+from .transform import Transform
+class Sentinel1ToDecibels(Transform):
+    """Convert Sentinel-1 data from raw intensity to or from decibels."""
+    def __init__(
+        self,
+        selectors: list[str] = ["image"],
+        from_decibels: bool = False,
+        epsilon: float = 1e-6,
+    ):
+        """Initialize a new Sentinel1ToDecibels.
+        Args:
+            selectors: the input selectors to apply the transform on.
+            from_decibels: convert from decibels to intensities instead of intensity to
+                decibels.
+            epsilon: when converting to decibels, clip the intensities to this minimum
+                value to avoid log issues. This is mostly to avoid pixels that have no
+                data with no data value being 0.
+        """
+        super().__init__()
+        self.selectors = selectors
+        self.from_decibels = from_decibels
+        self.epsilon = epsilon
+    def apply_image(self, image: torch.Tensor) -> torch.Tensor:
+        """Normalize the specified image.
+        Args:
+            image: the image to transform.
+        """
+        if self.from_decibels:
+            # Decibels to linear scale.
+            return torch.pow(10.0, image / 10.0)
+        else:
+            # Linear scale to decibels.
+            return 10 * torch.log10(torch.clamp(image, min=self.epsilon))
+    def forward(
+        self, input_dict: dict[str, Any], target_dict: dict[str, Any]
+    ) -> tuple[dict[str, Any], dict[str, Any]]:
+        """Apply normalization over the inputs and targets.
+        Args:
+            input_dict: the input
+            target_dict: the target
+        Returns:
+            normalized (input_dicts, target_dicts) tuple
+        """
+        self.apply_fn(self.apply_image, input_dict, target_dict, self.selectors)
+        return input_dict, target_dict

rslearn/train/transforms/transform.py CHANGED Viewed

@@ -54,7 +54,7 @@ def read_selector(
         the item specified by the selector
     """
     d, selector = get_dict_and_subselector(input_dict, target_dict, selector)
-    parts = selector.split("/")
+    parts = selector.split("/") if selector else []
     cur = d
     for part in parts:
         cur = cur[part]
@@ -76,11 +76,28 @@ def write_selector(
         v: the value to write
     """
     d, selector = get_dict_and_subselector(input_dict, target_dict, selector)
-    parts = selector.split("/")
-    cur = d
-    for part in parts[:-1]:
-        cur = cur[part]
-    cur[parts[-1]] = v
+    if selector:
+        parts = selector.split("/")
+        cur = d
+        for part in parts[:-1]:
+            cur = cur[part]
+        cur[parts[-1]] = v
+    else:
+        # If the selector references the input or target dictionary directly, then we
+        # have a special case where instead of overwriting with v, we replace the keys
+        # with those in v. v must be a dictionary here, not a tensor, since otherwise
+        # it wouldn't match the type of the input or target dictionary.
+        if not isinstance(v, dict):
+            raise ValueError(
+                "when directly specifying the input or target dict, expected the value to be a dict"
+            )
+        if d == v:
+            # This may happen if the writer did not make a copy of the dictionary. In
+            # this case the code below would not update d correctly since it would also
+            # clear v.
+            return
+        d.clear()
+        d.update(v)
 class Transform(torch.nn.Module):

rslearn/utils/raster_format.py CHANGED Viewed

@@ -2,13 +2,13 @@
 import hashlib
 import json
-from typing import Any, BinaryIO
+from collections.abc import Callable
+from typing import Any, BinaryIO, TypeVar
 import affine
 import numpy as np
 import numpy.typing as npt
 import rasterio
-from class_registry import ClassRegistry
 from PIL import Image
 from rasterio.crs import CRS
 from rasterio.enums import Resampling
@@ -21,18 +21,44 @@ from rslearn.utils.fsspec import open_rasterio_upath_reader, open_rasterio_upath
 from .geometry import PixelBounds, Projection
-RasterFormats = ClassRegistry()
+_RasterFormatT = TypeVar("_RasterFormatT", bound="RasterFormat")
+class _RasterFormatRegistry(dict[str, type["RasterFormat"]]):
+    """Registry for RasterFormat classes."""
+    def register(
+        self, name: str
+    ) -> Callable[[type[_RasterFormatT]], type[_RasterFormatT]]:
+        """Decorator to register a raster format class."""
+        def decorator(cls: type[_RasterFormatT]) -> type[_RasterFormatT]:
+            self[name] = cls
+            return cls
+        return decorator
+RasterFormats = _RasterFormatRegistry()
 logger = get_logger(__name__)
 def get_bandset_dirname(bands: list[str]) -> str:
     """Get the directory name that should be used to store the given group of bands."""
+    # We try to use a human-readable name with underscore as the delimiter, but if that
+    # isn't straightforward then we use hash instead.
     if any(["_" in band for band in bands]):
-        raise ValueError("band names must not contain '_'")
+        # In this case we hash the JSON representation of the bands.
+        return hashlib.sha256(json.dumps(bands).encode()).hexdigest()
     dirname = "_".join(bands)
     if len(dirname) > 64:
         # Previously we simply joined the bands, but this can result in directory name
         # that is too long. In this case, now we use hash instead.
+        # We use a different code path here where we hash the initial directory name
+        # instead of the JSON, for historical reasons (to maintain backwards
+        # compatibility).
         dirname = hashlib.sha256(dirname.encode()).hexdigest()
     return dirname
@@ -141,6 +167,19 @@ class RasterFormat:
         """
         raise NotImplementedError
+    @staticmethod
+    def from_config(name: str, config: dict[str, Any]) -> "RasterFormat":
+        """Create a RasterFormat from a config dict.
+        Args:
+            name: the name of this format
+            config: the config dict
+        Returns:
+            the RasterFormat instance
+        """
+        raise NotImplementedError
 @RasterFormats.register("image_tile")
 class ImageTileRasterFormat(RasterFormat):
@@ -710,5 +749,5 @@ def load_raster_format(config: RasterFormatConfig) -> RasterFormat:
     Returns:
         the loaded RasterFormat implementation
     """
-    cls = RasterFormats.get_class(config.name)
+    cls = RasterFormats[config.name]
     return cls.from_config(config.name, config.config_dict)

rslearn/utils/vector_format.py CHANGED Viewed

@@ -1,11 +1,11 @@
 """Classes for writing vector data to a UPath."""
 import json
+from collections.abc import Callable
 from enum import Enum
-from typing import Any
+from typing import Any, TypeVar
 import shapely
-from class_registry import ClassRegistry
 from rasterio.crs import CRS
 from upath import UPath
@@ -18,7 +18,25 @@ from .feature import Feature
 from .geometry import PixelBounds, Projection, STGeometry, safely_reproject_and_clip
 logger = get_logger(__name__)
-VectorFormats = ClassRegistry()
+_VectorFormatT = TypeVar("_VectorFormatT", bound="VectorFormat")
+class _VectorFormatRegistry(dict[str, type["VectorFormat"]]):
+    """Registry for VectorFormat classes."""
+    def register(
+        self, name: str
+    ) -> Callable[[type[_VectorFormatT]], type[_VectorFormatT]]:
+        """Decorator to register a vector format class."""
+        def decorator(cls: type[_VectorFormatT]) -> type[_VectorFormatT]:
+            self[name] = cls
+            return cls
+        return decorator
+VectorFormats = _VectorFormatRegistry()
 class VectorFormat:
@@ -53,6 +71,19 @@ class VectorFormat:
         """
         raise NotImplementedError
+    @staticmethod
+    def from_config(name: str, config: dict[str, Any]) -> "VectorFormat":
+        """Create a VectorFormat from a config dict.
+        Args:
+            name: the name of this format
+            config: the config dict
+        Returns:
+            the VectorFormat instance
+        """
+        raise NotImplementedError
 @VectorFormats.register("tile")
 class TileVectorFormat(VectorFormat):
@@ -410,5 +441,5 @@ def load_vector_format(config: VectorFormatConfig) -> VectorFormat:
     Returns:
         the loaded VectorFormat implementation
     """
-    cls = VectorFormats.get_class(config.name)
+    cls = VectorFormats[config.name]
     return cls.from_config(config.name, config.config_dict)

{rslearn-0.0.7.dist-info → rslearn-0.0.9.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: rslearn
-Version: 0.0.7
+Version: 0.0.9
 Summary: A library for developing remote sensing datasets and models
 Author: OlmoEarth Team
 License:                                  Apache License
@@ -212,7 +212,6 @@ Requires-Python: >=3.11
 Description-Content-Type: text/markdown
 License-File: LICENSE
 Requires-Dist: boto3>=1.39
-Requires-Dist: class_registry>=2.1
 Requires-Dist: fiona>=1.10
 Requires-Dist: fsspec>=2025.9.0
 Requires-Dist: jsonargparse>=4.35.0
@@ -233,7 +232,7 @@ Requires-Dist: cdsapi>=0.7.6; extra == "extra"
 Requires-Dist: earthdaily[platform]>=1.0.7; extra == "extra"
 Requires-Dist: earthengine-api>=1.6.3; extra == "extra"
 Requires-Dist: einops>=0.8; extra == "extra"
-Requires-Dist: gcsfs>=2025.9.0; extra == "extra"
+Requires-Dist: fsspec[gcs,s3]; extra == "extra"
 Requires-Dist: google-cloud-bigquery>=3.35; extra == "extra"
 Requires-Dist: google-cloud-storage>=2.18; extra == "extra"
 Requires-Dist: huggingface_hub>=0.34.4; extra == "extra"
@@ -244,7 +243,6 @@ Requires-Dist: planetary_computer>=1.0; extra == "extra"
 Requires-Dist: pycocotools>=2.0; extra == "extra"
 Requires-Dist: pystac_client>=0.9; extra == "extra"
 Requires-Dist: rtree>=1.4; extra == "extra"
-Requires-Dist: s3fs>=2025.9.0; extra == "extra"
 Requires-Dist: satlaspretrain_models>=0.3; extra == "extra"
 Requires-Dist: scipy>=1.16; extra == "extra"
 Requires-Dist: terratorch>=1.0.2; extra == "extra"
@@ -285,6 +283,7 @@ Quick links:
 - [Examples](docs/Examples.md) contains more examples, including customizing different
   stages of rslearn with additional code.
 - [DatasetConfig](docs/DatasetConfig.md) documents the dataset configuration file.
+- [ModelConfig](docs/ModelConfig.md) documents the model configuration file.
 Setup

rslearn 0.0.7__py3-none-any.whl → 0.0.9__py3-none-any.whl

rslearn 0.0.7py3-none-any.whl → 0.0.9py3-none-any.whl