PyPI - rslearn - Versions diffs - 0.0.7__py3-none-any.whl → 0.0.9__py3-none-any.whl - Mend

rslearn 0.0.7py3-none-any.whl → 0.0.9py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (31) hide show

rslearn/data_sources/local_files.py +20 -3
rslearn/data_sources/planetary_computer.py +79 -14
rslearn/dataset/handler_summaries.py +130 -0
rslearn/dataset/manage.py +159 -24
rslearn/dataset/materialize.py +21 -2
rslearn/dataset/remap.py +29 -4
rslearn/main.py +60 -8
rslearn/models/clay/clay.py +29 -14
rslearn/models/copernicusfm.py +37 -25
rslearn/models/dinov3.py +166 -0
rslearn/models/galileo/galileo.py +58 -12
rslearn/models/galileo/single_file_galileo.py +7 -1
rslearn/models/presto/presto.py +11 -0
rslearn/models/prithvi.py +139 -52
rslearn/models/registry.py +19 -2
rslearn/models/resize_features.py +45 -0
rslearn/models/simple_time_series.py +65 -10
rslearn/models/upsample.py +2 -2
rslearn/tile_stores/default.py +34 -7
rslearn/train/transforms/normalize.py +34 -5
rslearn/train/transforms/select_bands.py +67 -0
rslearn/train/transforms/sentinel1.py +60 -0
rslearn/train/transforms/transform.py +23 -6
rslearn/utils/raster_format.py +44 -5
rslearn/utils/vector_format.py +35 -4
{rslearn-0.0.7.dist-info → rslearn-0.0.9.dist-info}/METADATA +3 -4
{rslearn-0.0.7.dist-info → rslearn-0.0.9.dist-info}/RECORD +31 -26
{rslearn-0.0.7.dist-info → rslearn-0.0.9.dist-info}/WHEEL +0 -0
{rslearn-0.0.7.dist-info → rslearn-0.0.9.dist-info}/entry_points.txt +0 -0
{rslearn-0.0.7.dist-info → rslearn-0.0.9.dist-info}/licenses/LICENSE +0 -0
{rslearn-0.0.7.dist-info → rslearn-0.0.9.dist-info}/top_level.txt +0 -0

rslearn/models/galileo/galileo.py CHANGED Viewed

@@ -2,6 +2,7 @@
 import math
 import tempfile
+from contextlib import nullcontext
 from enum import StrEnum
 from typing import Any, cast
@@ -63,6 +64,11 @@ pretrained_weights: dict[GalileoSize, str] = {
 DEFAULT_NORMALIZER = Normalizer()
+AUTOCAST_DTYPE_MAP = {
+    "bfloat16": torch.bfloat16,
+    "float32": torch.float32,
+}
 class GalileoModel(nn.Module):
     """Galileo backbones."""
@@ -85,6 +91,7 @@ class GalileoModel(nn.Module):
         size: GalileoSize,
         patch_size: int = 4,
         pretrained_path: str | UPath | None = None,
+        autocast_dtype: str | None = "bfloat16",
     ) -> None:
         """Initialize the Galileo model.
@@ -93,6 +100,7 @@ class GalileoModel(nn.Module):
             patch_size: The patch size to use.
             pretrained_path: the local path to the pretrained weights. Otherwise it is
                 downloaded and cached in temp directory.
+            autocast_dtype: which dtype to use for autocasting, or set None to disable.
         """
         super().__init__()
         if pretrained_path is None:
@@ -128,8 +136,14 @@ class GalileoModel(nn.Module):
             idx for idx, key in enumerate(SPACE_TIME_BANDS_GROUPS_IDX) if "S1" in key
         ]
+        self.size = size
         self.patch_size = patch_size
+        if autocast_dtype is not None:
+            self.autocast_dtype = AUTOCAST_DTYPE_MAP[autocast_dtype]
+        else:
+            self.autocast_dtype = None
     @staticmethod
     def to_cartesian(
         lat: float | np.ndarray | torch.Tensor, lon: float | np.ndarray | torch.Tensor
@@ -484,18 +498,31 @@ class GalileoModel(nn.Module):
             patch_size = h
         else:
             patch_size = self.patch_size
-        outputs = self.model(
-            s_t_x=galileo_input.s_t_x,
-            s_t_m=galileo_input.s_t_m,
-            sp_x=galileo_input.sp_x,
-            sp_m=galileo_input.sp_m,
-            t_x=galileo_input.t_x,
-            t_m=galileo_input.t_m,
-            st_x=galileo_input.st_x,
-            st_m=galileo_input.st_m,
-            months=galileo_input.months,
-            patch_size=patch_size,
-        )
+        # Decide context based on self.autocast_dtype.
+        device = galileo_input.s_t_x.device
+        if self.autocast_dtype is None:
+            context = nullcontext()
+        else:
+            assert device is not None
+            context = torch.amp.autocast(
+                device_type=device.type, dtype=self.autocast_dtype
+            )
+        with context:
+            outputs = self.model(
+                s_t_x=galileo_input.s_t_x,
+                s_t_m=galileo_input.s_t_m,
+                sp_x=galileo_input.sp_x,
+                sp_m=galileo_input.sp_m,
+                t_x=galileo_input.t_x,
+                t_m=galileo_input.t_m,
+                st_x=galileo_input.st_x,
+                st_m=galileo_input.st_m,
+                months=galileo_input.months,
+                patch_size=patch_size,
+            )
         if h == patch_size:
             # only one spatial patch, so we can just take an average
             # of all the tokens to output b c_g 1 1
@@ -515,3 +542,22 @@ class GalileoModel(nn.Module):
                     "b h w c_g d -> b c_g d h w",
                 ).mean(dim=1)
             ]
+    def get_backbone_channels(self) -> list:
+        """Returns the output channels of this model when used as a backbone.
+        The output channels is a list of (patch_size, depth) that corresponds
+        to the feature maps that the backbone returns.
+        Returns:
+            the output channels of the backbone as a list of (patch_size, depth) tuples.
+        """
+        if self.size == GalileoSize.BASE:
+            depth = 768
+        elif self.model_size == GalileoSize.TINY:
+            depth = 192
+        elif self.model_size == GalileoSize.NANO:
+            depth = 128
+        else:
+            raise ValueError(f"Invalid model size: {self.size}")
+        return [(self.patch_size, depth)]

rslearn/models/galileo/single_file_galileo.py CHANGED Viewed

@@ -1469,7 +1469,13 @@ class Encoder(GalileoBase):
             # we take the inverse of the mask because a value
             # of True indicates the value *should* take part in
             # attention
-            x = blk(x=x, y=None, attn_mask=~new_m.bool())
+            temp_mask = ~new_m.bool()
+            if temp_mask.all():
+                # if all the tokens are used in attention we can pass a None mask
+                # to the attention block
+                temp_mask = None
+            x = blk(x=x, y=None, attn_mask=temp_mask)
         if exit_ids_seq is not None:
             assert exited_tokens is not None

rslearn/models/presto/presto.py CHANGED Viewed

@@ -248,3 +248,14 @@ class Presto(nn.Module):
             output_features[batch_idx : batch_idx + self.pixel_batch_size] = output_b
         return [rearrange(output_features, "(b h w) d -> b d h w", h=h, w=w, b=b)]
+    def get_backbone_channels(self) -> list:
+        """Returns the output channels of this model when used as a backbone.
+        The output channels is a list of (patch_size, depth) that corresponds
+        to the feature maps that the backbone returns.
+        Returns:
+            the output channels of the backbone as a list of (patch_size, depth) tuples.
+        """
+        return [(1, 128)]

rslearn/models/prithvi.py CHANGED Viewed

@@ -1,57 +1,91 @@
 """Prithvi V2."""
+import json
 import logging
 import tempfile
 import warnings
+from enum import StrEnum
+from pathlib import Path
 from typing import Any
 import numpy as np
 import torch
 import torch.nn as nn
-import yaml
 from einops import rearrange
 from huggingface_hub import hf_hub_download
 from timm.layers import to_2tuple
 from timm.models.vision_transformer import Block
 from torch.nn import functional as F
-from upath import UPath
+from rslearn.train.transforms.normalize import Normalize
+from rslearn.train.transforms.transform import Transform
 logger = logging.getLogger(__name__)
-# for Prithvi, true values are ["B02", "B03", "B04", "B05", "B06", "B07"]
-PRITHVI_MEAN = [
-    1087.0,
-    1342.0,
-    1433.0,
-    2734.0,
-    1958.0,
-    1363.0,
-]
-PRITHVI_STD = [
-    2248.0,
-    2179.0,
-    2178.0,
-    1850.0,
-    1242.0,
-    1049.0,
-]
+class PrithviV2Models(StrEnum):
+    """Names for different Prithvi models on torch hub."""
+    VIT_300 = "VIT_300"
+    VIT_600 = "VIT_600"
+MODEL_TO_HF_INFO = {
+    PrithviV2Models.VIT_300: {
+        "hf_hub_id": "ibm-nasa-geospatial/Prithvi-EO-2.0-300M",
+        "weights": "Prithvi_EO_V2_300M.pt",
+        "revision": "b2f2520ab889f42a25c5361ba18761fcb4ea44ad",
+    },
+    PrithviV2Models.VIT_600: {
+        "hf_hub_id": "ibm-nasa-geospatial/Prithvi-EO-2.0-600M",
+        "weights": "Prithvi_EO_V2_600M.pt",
+        "revision": "87f15784813828dc37aa3197a143cd4689e4d080",
+    },
+}
+HF_HUB_CONFIG_FNAME = "config.json"
+DEFAULT_CACHE_DIR = Path(tempfile.gettempdir(), "rslearn_cache", "prithvi_v2")
-HF_HUB_ID = "ibm-nasa-geospatial/Prithvi-EO-2.0-300M"
+def get_config(cache_dir: Path, hf_hub_id: str, hf_hub_revision: str) -> dict[str, Any]:
+    """Get the JSON config dict.
+    Args:
+        cache_dir: the directory to cache the config.json file, which will be
+            downloaded from HF Hub.
+        hf_hub_id: the HF Hub ID from which to download the config.
+        hf_hub_revision: The revision (commit) to download the config from.
+    """
+    cache_fname = cache_dir / HF_HUB_CONFIG_FNAME
+    if not cache_fname.exists():
+        _ = hf_hub_download(
+            local_dir=cache_dir,
+            repo_id=hf_hub_id,
+            filename=HF_HUB_CONFIG_FNAME,
+            revision=hf_hub_revision,
+        )  # nosec
+    with cache_fname.open() as f:
+        return json.load(f)["pretrained_cfg"]
 class PrithviV2(nn.Module):
     """An Rslearn wrapper for Prithvi 2.0."""
-    input_keys = ["sentinel2"]
+    INPUT_KEY = "image"
-    def __init__(self, pretrained_path: str | UPath | None = None, num_frames: int = 1):
-        """Init.
+    def __init__(
+        self,
+        cache_dir: str | Path | None = None,
+        size: PrithviV2Models = PrithviV2Models.VIT_300,
+        num_frames: int = 1,
+    ):
+        """Create a new PrithviV2.
-        Inputs:
-            pretrained_path: The folder in which to download the prithvi config
-                and weights. If None, it downloads to a temporary folder.
+        Args:
+            cache_dir: The local folder in which to download the prithvi config and
+                weights. If None, it downloads to a temporary folder.
+            size: the model size, see class for various models.
             num_frames: The number of input frames (timesteps). The model was trained on 3,
                 but if there is just one timestamp examples use 1 (e.g.
                 https://github.com/NASA-IMPACT/Prithvi-EO-2.0/blob/main/examples/
@@ -59,35 +93,28 @@ class PrithviV2(nn.Module):
         """
         super().__init__()
-        if pretrained_path is None:
-            pretrained_path = UPath(
-                tempfile.gettempdir(), "rslearn_cache", "prithvi_v2"
-            )
+        if cache_dir is None:
+            cache_dir = DEFAULT_CACHE_DIR
+        cache_dir = Path(cache_dir)
-        if not (UPath(pretrained_path) / "config.json").exists():
-            _ = hf_hub_download(
-                local_dir=pretrained_path,
-                repo_id=HF_HUB_ID,
-                filename="config.json",
-                revision="b2f2520ab889f42a25c5361ba18761fcb4ea44ad",
-            )
-        with (UPath(pretrained_path) / "config.json").open("r") as f:
-            config = yaml.safe_load(f)["pretrained_cfg"]
+        hub_id = MODEL_TO_HF_INFO[size]["hf_hub_id"]
+        revision = MODEL_TO_HF_INFO[size]["revision"]
+        checkpoint_fname = MODEL_TO_HF_INFO[size]["weights"]
+        config = get_config(cache_dir, hub_id, revision)
         config["num_frames"] = num_frames
         self.model = PrithviMAE(**config)
-        if not (UPath(pretrained_path) / "Prithvi_EO_V2_300M.pt").exists():
+        if not (cache_dir / checkpoint_fname).exists():
             _ = hf_hub_download(
-                local_dir=pretrained_path,
-                repo_id=HF_HUB_ID,
-                filename="Prithvi_EO_V2_300M.pt",
-                revision="b2f2520ab889f42a25c5361ba18761fcb4ea44ad",
-            )
+                local_dir=cache_dir,
+                repo_id=hub_id,
+                filename=checkpoint_fname,
+                revision=revision,
+            )  # nosec
         state_dict = torch.load(
-            UPath(pretrained_path) / "Prithvi_EO_V2_300M.pt",
+            cache_dir / checkpoint_fname,
             map_location="cpu",
             weights_only=True,
         )
@@ -125,16 +152,15 @@ class PrithviV2(nn.Module):
     def forward(self, inputs: list[dict[str, Any]]) -> list[torch.Tensor]:
         """Compute feature maps from the Prithvi V2 backbone.
-        Inputs:
-            inputs: input dicts that must include "sentinel2"
-            keys depending. Prithvi is designed for HLS (Harmonized Landsat-Sentinel);
-            this naming keeps the model consistent with other rslearn models.
+        Args:
+            inputs: input dicts that must include "image" key containing HLS
+                (Harmonized Landsat-Sentinel) data.
         Returns:
             11 feature maps (one per transformer block in the Prithvi model),
             of shape [B, H/p_s, W/p_s, D=1024] where p_s=16 is the patch size.
         """
-        x = torch.stack([inp["sentinel2"] for inp in inputs], dim=0)
+        x = torch.stack([inp[self.INPUT_KEY] for inp in inputs], dim=0)
         x = self._resize_data(x)
         num_timesteps = x.shape[1] // len(self.bands)
         x = rearrange(x, "b (t c) h w -> b c t h w", t=num_timesteps)
@@ -147,6 +173,67 @@ class PrithviV2(nn.Module):
             features, num_timesteps
         )
+    def get_backbone_channels(self) -> list:
+        """Returns the output channels of this model when used as a backbone.
+        The output channels is a list of (patch_size, depth) that corresponds
+        to the feature maps that the backbone returns.
+        Returns:
+            the output channels of the backbone as a list of (patch_size, depth) tuples.
+        """
+        return [(1, 1024)]
+class PrithviNormalize(Transform):
+    """Normalize inputs using Prithvi normalization.
+    Similar to the model, the input should be an image time series under the key
+    "image".
+    """
+    def __init__(
+        self,
+        cache_dir: str | Path | None = None,
+        size: PrithviV2Models = PrithviV2Models.VIT_300,
+    ) -> None:
+        """Initialize a new PrithviNormalize.
+        Args:
+            cache_dir: the local directory to cache the config.json which contains the
+                means and standard deviations used in the normalization.
+            size: the model size, see class for various models. In this case (and
+                for the current hf revision), the config values (mean and std) are the
+                same for both the 300M and 600M model, so its safe to not set this.
+        """
+        super().__init__()
+        hub_id = MODEL_TO_HF_INFO[size]["hf_hub_id"]
+        revision = MODEL_TO_HF_INFO[size]["revision"]
+        if cache_dir is None:
+            cache_dir = DEFAULT_CACHE_DIR
+        cache_dir = Path(cache_dir)
+        config = get_config(cache_dir, hub_id, revision)
+        self.normalizer = Normalize(
+            mean=config["mean"],
+            std=config["std"],
+            num_bands=len(config["mean"]),
+            selectors=[PrithviV2.INPUT_KEY],
+        )
+    def forward(
+        self, input_dict: dict[str, Any], target_dict: dict[str, Any]
+    ) -> tuple[dict[str, Any], dict[str, Any]]:
+        """Apply Prithvi normalization on the image.
+        Args:
+            input_dict: the input, which must contain the "image" key.
+            target_dict: the target
+        Returns:
+            normalized (input_dicts, target_dicts) tuple
+        """
+        return self.normalizer(input_dict, target_dict)
 # Copyright (c) IBM Corp. 2024. All rights reserved.
 #

rslearn/models/registry.py CHANGED Viewed

@@ -1,5 +1,22 @@
 """Model registry."""
-from class_registry import ClassRegistry
+from collections.abc import Callable
+from typing import Any, TypeVar
-Models = ClassRegistry()
+_ModelT = TypeVar("_ModelT")
+class _ModelRegistry(dict[str, type[Any]]):
+    """Registry for Model classes."""
+    def register(self, name: str) -> Callable[[type[_ModelT]], type[_ModelT]]:
+        """Decorator to register a model class."""
+        def decorator(cls: type[_ModelT]) -> type[_ModelT]:
+            self[name] = cls
+            return cls
+        return decorator
+Models = _ModelRegistry()

rslearn/models/resize_features.py ADDED Viewed

@@ -0,0 +1,45 @@
+"""The ResizeFeatures module."""
+import torch
+class ResizeFeatures(torch.nn.Module):
+    """Resize input features to new sizes."""
+    def __init__(
+        self,
+        out_sizes: list[tuple[int, int]],
+        mode: str = "bilinear",
+    ):
+        """Initialize a ResizeFeatures.
+        Args:
+            out_sizes: the output sizes of the feature maps. There must be one entry
+                for each input feature map.
+            mode: mode to pass to torch.nn.Upsample, e.g. "bilinear" (default) or
+                "nearest".
+        """
+        super().__init__()
+        layers = []
+        for size in out_sizes:
+            layers.append(
+                torch.nn.Upsample(
+                    size=size,
+                    mode=mode,
+                )
+            )
+        self.layers = torch.nn.ModuleList(layers)
+    def forward(
+        self, features: list[torch.Tensor], inputs: list[torch.Tensor]
+    ) -> list[torch.Tensor]:
+        """Resize the input feature maps to new sizes.
+        Args:
+            features: list of feature maps at different resolutions.
+            inputs: original inputs (ignored).
+        Returns:
+            resized feature maps
+        """
+        return [self.layers[idx](feat_map) for idx, feat_map in enumerate(features)]

rslearn/models/simple_time_series.py CHANGED Viewed

@@ -20,12 +20,13 @@ class SimpleTimeSeries(torch.nn.Module):
     def __init__(
         self,
         encoder: torch.nn.Module,
-        image_channels: int,
+        image_channels: int | None = None,
         op: str = "max",
         groups: list[list[int]] | None = None,
         num_layers: int | None = None,
         image_key: str = "image",
         backbone_channels: list[tuple[int, int]] | None = None,
+        image_keys: dict[str, int] | None = None,
     ) -> None:
         """Create a new SimpleTimeSeries.
@@ -48,13 +49,25 @@ class SimpleTimeSeries(torch.nn.Module):
             image_key: the key to access the images.
             backbone_channels: manually specify the backbone channels. Can be set if
                 the encoder does not provide get_backbone_channels function.
+            image_keys: as an alternative to setting image_channels, map from the key
+                in input dict to the number of channels per timestep for that modality.
+                This way SimpleTimeSeries can be used with multimodal inputs. One of
+                image_channels or image_keys must be specified.
         """
+        if (image_channels is None and image_keys is None) or (
+            image_channels is not None and image_keys is not None
+        ):
+            raise ValueError(
+                "exactly one of image_channels and image_keys must be specified"
+            )
         super().__init__()
         self.encoder = encoder
         self.image_channels = image_channels
         self.op = op
         self.groups = groups
         self.image_key = image_key
+        self.image_keys = image_keys
         if backbone_channels is not None:
             out_channels = backbone_channels
@@ -144,6 +157,26 @@ class SimpleTimeSeries(torch.nn.Module):
             out_channels.append((downsample_factor, depth * self.num_groups))
         return out_channels
+    def _get_batched_images(
+        self, input_dicts: list[dict[str, Any]], image_key: str, image_channels: int
+    ) -> torch.Tensor:
+        """Collect and reshape images across input dicts.
+        The BTCHW image time series are reshaped to (B*T)CHW so they can be passed to
+        the forward pass of a per-image (unitemporal) model.
+        """
+        images = torch.stack(
+            [input_dict[image_key] for input_dict in input_dicts], dim=0
+        )
+        n_batch = images.shape[0]
+        n_images = images.shape[1] // image_channels
+        n_height = images.shape[2]
+        n_width = images.shape[3]
+        batched_images = images.reshape(
+            n_batch * n_images, image_channels, n_height, n_width
+        )
+        return batched_images
     def forward(
         self,
         inputs: list[dict[str, Any]],
@@ -156,15 +189,37 @@ class SimpleTimeSeries(torch.nn.Module):
         """
         # First get features of each image.
         # To do so, we need to split up each grouped image into its component images (which have had their channels stacked).
-        images = torch.stack([inp[self.image_key] for inp in inputs], dim=0)
-        n_batch = images.shape[0]
-        n_images = images.shape[1] // self.image_channels
-        n_height = images.shape[2]
-        n_width = images.shape[3]
-        batched_images = images.reshape(
-            n_batch * n_images, self.image_channels, n_height, n_width
-        )
-        batched_inputs = [{self.image_key: image} for image in batched_images]
+        batched_inputs: list[dict[str, Any]] | None = None
+        n_batch = len(inputs)
+        n_images: int | None = None
+        if self.image_keys is not None:
+            for image_key, image_channels in self.image_keys.items():
+                batched_images = self._get_batched_images(
+                    inputs, image_key, image_channels
+                )
+                if batched_inputs is None:
+                    batched_inputs = [{} for _ in batched_images]
+                    n_images = batched_images.shape[0] // n_batch
+                elif n_images != batched_images.shape[0] // n_batch:
+                    raise ValueError(
+                        "expected all modalities to have the same number of timesteps"
+                    )
+                for i, image in enumerate(batched_images):
+                    batched_inputs[i][image_key] = image
+        else:
+            assert self.image_channels is not None
+            batched_images = self._get_batched_images(
+                inputs, self.image_key, self.image_channels
+            )
+            batched_inputs = [{self.image_key: image} for image in batched_images]
+            n_images = batched_images.shape[0] // n_batch
+        assert n_images is not None
         all_features = [
             feat_map.reshape(
                 n_batch,

rslearn/models/upsample.py CHANGED Viewed

@@ -23,13 +23,13 @@ class Upsample(torch.nn.Module):
     def forward(
         self, features: list[torch.Tensor], inputs: list[torch.Tensor]
     ) -> list[torch.Tensor]:
-        """Compute flat output vector from multi-scale feature map.
+        """Upsample each feature map.
         Args:
             features: list of feature maps at different resolutions.
             inputs: original inputs (ignored).
         Returns:
-            flat feature vector
+            upsampled feature maps
         """
         return [self.layer(feat_map) for feat_map in features]

rslearn 0.0.7__py3-none-any.whl → 0.0.9__py3-none-any.whl

rslearn 0.0.7py3-none-any.whl → 0.0.9py3-none-any.whl