PyPI - rslearn - Versions diffs - 0.0.18__py3-none-any.whl → 0.0.20__py3-none-any.whl - Mend

rslearn 0.0.18py3-none-any.whl → 0.0.20py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (55) hide show

rslearn/arg_parser.py +2 -9
rslearn/config/dataset.py +15 -16
rslearn/dataset/dataset.py +28 -22
rslearn/lightning_cli.py +22 -11
rslearn/main.py +1 -1
rslearn/models/anysat.py +35 -33
rslearn/models/attention_pooling.py +177 -0
rslearn/models/clip.py +5 -2
rslearn/models/component.py +12 -0
rslearn/models/croma.py +11 -3
rslearn/models/dinov3.py +2 -1
rslearn/models/faster_rcnn.py +2 -1
rslearn/models/galileo/galileo.py +58 -31
rslearn/models/module_wrapper.py +6 -1
rslearn/models/molmo.py +4 -2
rslearn/models/olmoearth_pretrain/model.py +206 -51
rslearn/models/olmoearth_pretrain/norm.py +5 -3
rslearn/models/panopticon.py +3 -1
rslearn/models/presto/presto.py +45 -15
rslearn/models/prithvi.py +9 -7
rslearn/models/sam2_enc.py +3 -1
rslearn/models/satlaspretrain.py +4 -1
rslearn/models/simple_time_series.py +43 -17
rslearn/models/ssl4eo_s12.py +19 -14
rslearn/models/swin.py +3 -1
rslearn/models/terramind.py +5 -4
rslearn/train/all_patches_dataset.py +96 -28
rslearn/train/dataset.py +102 -53
rslearn/train/model_context.py +35 -1
rslearn/train/scheduler.py +15 -0
rslearn/train/tasks/classification.py +8 -2
rslearn/train/tasks/detection.py +3 -2
rslearn/train/tasks/multi_task.py +2 -3
rslearn/train/tasks/per_pixel_regression.py +14 -5
rslearn/train/tasks/regression.py +8 -2
rslearn/train/tasks/segmentation.py +13 -4
rslearn/train/tasks/task.py +2 -2
rslearn/train/transforms/concatenate.py +45 -5
rslearn/train/transforms/crop.py +22 -8
rslearn/train/transforms/flip.py +13 -5
rslearn/train/transforms/mask.py +11 -2
rslearn/train/transforms/normalize.py +46 -15
rslearn/train/transforms/pad.py +15 -3
rslearn/train/transforms/resize.py +83 -0
rslearn/train/transforms/select_bands.py +11 -2
rslearn/train/transforms/sentinel1.py +18 -3
rslearn/utils/geometry.py +73 -0
rslearn/utils/jsonargparse.py +66 -0
{rslearn-0.0.18.dist-info → rslearn-0.0.20.dist-info}/METADATA +1 -1
{rslearn-0.0.18.dist-info → rslearn-0.0.20.dist-info}/RECORD +55 -53
{rslearn-0.0.18.dist-info → rslearn-0.0.20.dist-info}/WHEEL +0 -0
{rslearn-0.0.18.dist-info → rslearn-0.0.20.dist-info}/entry_points.txt +0 -0
{rslearn-0.0.18.dist-info → rslearn-0.0.20.dist-info}/licenses/LICENSE +0 -0
{rslearn-0.0.18.dist-info → rslearn-0.0.20.dist-info}/licenses/NOTICE +0 -0
{rslearn-0.0.18.dist-info → rslearn-0.0.20.dist-info}/top_level.txt +0 -0

rslearn/train/transforms/crop.py CHANGED Viewed

@@ -5,6 +5,8 @@ from typing import Any
 import torch
 import torchvision
+from rslearn.train.model_context import RasterImage
 from .transform import Transform, read_selector
@@ -69,7 +71,9 @@ class Crop(Transform):
             "remove_from_top": remove_from_top,
         }
-    def apply_image(self, image: torch.Tensor, state: dict[str, Any]) -> torch.Tensor:
+    def apply_image(
+        self, image: RasterImage | torch.Tensor, state: dict[str, Any]
+    ) -> RasterImage | torch.Tensor:
         """Apply the sampled state on the specified image.
         Args:
@@ -80,13 +84,23 @@ class Crop(Transform):
         crop_size = state["crop_size"] * image.shape[-1] // image_shape[1]
         remove_from_left = state["remove_from_left"] * image.shape[-1] // image_shape[1]
         remove_from_top = state["remove_from_top"] * image.shape[-2] // image_shape[0]
-        return torchvision.transforms.functional.crop(
-            image,
-            top=remove_from_top,
-            left=remove_from_left,
-            height=crop_size,
-            width=crop_size,
-        )
+        if isinstance(image, RasterImage):
+            image.image = torchvision.transforms.functional.crop(
+                image.image,
+                top=remove_from_top,
+                left=remove_from_left,
+                height=crop_size,
+                width=crop_size,
+            )
+        else:
+            image = torchvision.transforms.functional.crop(
+                image,
+                top=remove_from_top,
+                left=remove_from_left,
+                height=crop_size,
+                width=crop_size,
+            )
+        return image
     def apply_boxes(self, boxes: Any, state: dict[str, bool]) -> torch.Tensor:
         """Apply the sampled state on the specified image.

rslearn/train/transforms/flip.py CHANGED Viewed

@@ -4,6 +4,8 @@ from typing import Any
 import torch
+from rslearn.train.model_context import RasterImage
 from .transform import Transform
@@ -48,17 +50,23 @@ class Flip(Transform):
             "vertical": vertical,
         }
-    def apply_image(self, image: torch.Tensor, state: dict[str, bool]) -> torch.Tensor:
+    def apply_image(self, image: RasterImage, state: dict[str, bool]) -> RasterImage:
         """Apply the sampled state on the specified image.
         Args:
             image: the image to transform.
             state: the sampled state.
         """
-        if state["horizontal"]:
-            image = torch.flip(image, dims=[-1])
-        if state["vertical"]:
-            image = torch.flip(image, dims=[-2])
+        if isinstance(image, RasterImage):
+            if state["horizontal"]:
+                image.image = torch.flip(image.image, dims=[-1])
+            if state["vertical"]:
+                image.image = torch.flip(image.image, dims=[-2])
+        elif isinstance(image, torch.Tensor):
+            if state["horizontal"]:
+                image = torch.flip(image, dims=[-1])
+            if state["vertical"]:
+                image = torch.flip(image, dims=[-2])
         return image
     def apply_boxes(

rslearn/train/transforms/mask.py CHANGED Viewed

@@ -2,6 +2,7 @@
 import torch
+from rslearn.train.model_context import RasterImage
 from rslearn.train.transforms.transform import Transform, read_selector
@@ -31,7 +32,9 @@ class Mask(Transform):
         self.mask_selector = mask_selector
         self.mask_value = mask_value
-    def apply_image(self, image: torch.Tensor, mask: torch.Tensor) -> torch.Tensor:
+    def apply_image(
+        self, image: torch.Tensor | RasterImage, mask: torch.Tensor | RasterImage
+    ) -> torch.Tensor | RasterImage:
         """Apply the mask on the image.
         Args:
@@ -42,6 +45,9 @@ class Mask(Transform):
             masked image
         """
         # Tile the mask to have same number of bands as the image.
+        if isinstance(mask, RasterImage):
+            mask = mask.image
         if image.shape[0] != mask.shape[0]:
             if mask.shape[0] != 1:
                 raise ValueError(
@@ -49,7 +55,10 @@ class Mask(Transform):
                 )
             mask = mask.repeat(image.shape[0], 1, 1)
-        image[mask == 0] = self.mask_value
+        if isinstance(image, torch.Tensor):
+            image[mask == 0] = self.mask_value
+        else:
+            image.image[mask == 0] = self.mask_value
         return image
     def forward(self, input_dict: dict, target_dict: dict) -> tuple[dict, dict]:

rslearn/train/transforms/normalize.py CHANGED Viewed

@@ -4,6 +4,8 @@ from typing import Any
 import torch
+from rslearn.train.model_context import RasterImage
 from .transform import Transform
@@ -55,7 +57,9 @@ class Normalize(Transform):
         self.bands = torch.tensor(bands) if bands is not None else None
         self.num_bands = num_bands
-    def apply_image(self, image: torch.Tensor) -> torch.Tensor:
+    def apply_image(
+        self, image: torch.Tensor | RasterImage
+    ) -> torch.Tensor | RasterImage:
         """Normalize the specified image.
         Args:
@@ -63,7 +67,7 @@ class Normalize(Transform):
         """
         def _repeat_mean_and_std(
-            image_channels: int, num_bands: int | None
+            image_channels: int, num_bands: int | None, is_raster_image: bool
         ) -> tuple[torch.Tensor, torch.Tensor]:
             """Get mean and std tensor that are suitable for applying on the image."""
             # We only need to repeat the tensor if both of these are true:
@@ -74,9 +78,16 @@ class Normalize(Transform):
             if num_bands is None:
                 return self.mean, self.std
             num_images = image_channels // num_bands
-            return self.mean.repeat(num_images)[:, None, None], self.std.repeat(
-                num_images
-            )[:, None, None]
+            if is_raster_image:
+                # add an extra T dimension, CTHW
+                return self.mean.repeat(num_images)[
+                    :, None, None, None
+                ], self.std.repeat(num_images)[:, None, None, None]
+            else:
+                # add an extra T dimension, CTHW
+                return self.mean.repeat(num_images)[:, None, None], self.std.repeat(
+                    num_images
+                )[:, None, None]
         if self.bands is not None:
             # User has provided band indices to normalize.
@@ -96,20 +107,40 @@ class Normalize(Transform):
             # We use len(self.bands) here because that is how many bands per timestep
             # we are actually processing with the mean/std.
             mean, std = _repeat_mean_and_std(
-                image_channels=len(band_indices), num_bands=len(self.bands)
+                image_channels=len(band_indices),
+                num_bands=len(self.bands),
+                is_raster_image=isinstance(image, RasterImage),
             )
-            image[band_indices] = (image[band_indices] - mean) / std
-            if self.valid_min is not None:
-                image[band_indices] = torch.clamp(
-                    image[band_indices], min=self.valid_min, max=self.valid_max
-                )
+            if isinstance(image, torch.Tensor):
+                image[band_indices] = (image[band_indices] - mean) / std
+                if self.valid_min is not None:
+                    image[band_indices] = torch.clamp(
+                        image[band_indices], min=self.valid_min, max=self.valid_max
+                    )
+            else:
+                image.image[band_indices] = (image.image[band_indices] - mean) / std
+                if self.valid_min is not None:
+                    image.image[band_indices] = torch.clamp(
+                        image.image[band_indices],
+                        min=self.valid_min,
+                        max=self.valid_max,
+                    )
         else:
             mean, std = _repeat_mean_and_std(
-                image_channels=image.shape[0], num_bands=self.num_bands
+                image_channels=image.shape[0],
+                num_bands=self.num_bands,
+                is_raster_image=isinstance(image, RasterImage),
             )
-            image = (image - mean) / std
-            if self.valid_min is not None:
-                image = torch.clamp(image, min=self.valid_min, max=self.valid_max)
+            if isinstance(image, torch.Tensor):
+                image = (image - mean) / std
+                if self.valid_min is not None:
+                    image = torch.clamp(image, min=self.valid_min, max=self.valid_max)
+            else:
+                image.image = (image.image - mean) / std
+                if self.valid_min is not None:
+                    image.image = torch.clamp(
+                        image.image, min=self.valid_min, max=self.valid_max
+                    )
         return image
     def forward(

rslearn/train/transforms/pad.py CHANGED Viewed

@@ -5,6 +5,8 @@ from typing import Any
 import torch
 import torchvision
+from rslearn.train.model_context import RasterImage
 from .transform import Transform
@@ -48,7 +50,9 @@ class Pad(Transform):
         """
         return {"size": torch.randint(low=self.size[0], high=self.size[1], size=())}
-    def apply_image(self, image: torch.Tensor, state: dict[str, bool]) -> torch.Tensor:
+    def apply_image(
+        self, image: RasterImage | torch.Tensor, state: dict[str, bool]
+    ) -> RasterImage | torch.Tensor:
         """Apply the sampled state on the specified image.
         Args:
@@ -101,8 +105,16 @@ class Pad(Transform):
             horizontal_pad = (horizontal_half, horizontal_extra - horizontal_half)
             vertical_pad = (vertical_half, vertical_extra - vertical_half)
-        image = apply_padding(image, True, horizontal_pad[0], horizontal_pad[1])
-        image = apply_padding(image, False, vertical_pad[0], vertical_pad[1])
+        if isinstance(image, RasterImage):
+            image.image = apply_padding(
+                image.image, True, horizontal_pad[0], horizontal_pad[1]
+            )
+            image.image = apply_padding(
+                image.image, False, vertical_pad[0], vertical_pad[1]
+            )
+        else:
+            image = apply_padding(image, True, horizontal_pad[0], horizontal_pad[1])
+            image = apply_padding(image, False, vertical_pad[0], vertical_pad[1])
         return image
     def apply_boxes(self, boxes: Any, state: dict[str, bool]) -> torch.Tensor:

rslearn/train/transforms/resize.py ADDED Viewed

@@ -0,0 +1,83 @@
+"""Resize transform."""
+from typing import Any
+import torch
+import torchvision
+from torchvision.transforms import InterpolationMode
+from rslearn.train.model_context import RasterImage
+from .transform import Transform
+INTERPOLATION_MODES = {
+    "nearest": InterpolationMode.NEAREST,
+    "nearest_exact": InterpolationMode.NEAREST_EXACT,
+    "bilinear": InterpolationMode.BILINEAR,
+    "bicubic": InterpolationMode.BICUBIC,
+}
+class Resize(Transform):
+    """Resizes inputs to a target size."""
+    def __init__(
+        self,
+        target_size: tuple[int, int],
+        selectors: list[str] = [],
+        interpolation: str = "nearest",
+    ):
+        """Initialize a resize transform.
+        Args:
+            target_size: the (height, width) to resize to.
+            selectors: items to transform.
+            interpolation: the interpolation mode to use for resizing.
+                Must be one of "nearest", "nearest_exact", "bilinear", or "bicubic".
+        """
+        super().__init__()
+        self.target_size = target_size
+        self.selectors = selectors
+        self.interpolation = INTERPOLATION_MODES[interpolation]
+    def apply_resize(
+        self, image: torch.Tensor | RasterImage
+    ) -> torch.Tensor | RasterImage:
+        """Apply resizing on the specified image.
+        If the image is 2D, it is unsqueezed to 3D and then squeezed
+        back after resizing.
+        Args:
+            image: the image to transform.
+        """
+        if isinstance(image, torch.Tensor):
+            if image.dim() == 2:
+                image = image.unsqueeze(0)  # (H, W) -> (1, H, W)
+                result = torchvision.transforms.functional.resize(
+                    image, self.target_size, self.interpolation
+                )
+                return result.squeeze(0)  # (1, H, W) -> (H, W)
+            return torchvision.transforms.functional.resize(
+                image, self.target_size, self.interpolation
+            )
+        else:
+            image.image = torchvision.transforms.functional.resize(
+                image.image, self.target_size, self.interpolation
+            )
+            return image
+    def forward(
+        self, input_dict: dict[str, Any], target_dict: dict[str, Any]
+    ) -> tuple[dict[str, Any], dict[str, Any]]:
+        """Apply transform over the inputs and targets.
+        Args:
+            input_dict: the input
+            target_dict: the target
+        Returns:
+            transformed (input_dicts, target_dicts) tuple
+        """
+        self.apply_fn(self.apply_resize, input_dict, target_dict, self.selectors)
+        return input_dict, target_dict

rslearn/train/transforms/select_bands.py CHANGED Viewed

@@ -2,6 +2,8 @@
 from typing import Any
+from rslearn.train.model_context import RasterImage
 from .transform import Transform, read_selector, write_selector
@@ -49,6 +51,10 @@ class SelectBands(Transform):
             if self.num_bands_per_timestep is not None
             else image.shape[0]
         )
+        if isinstance(image, RasterImage):
+            assert num_bands_per_timestep == image.shape[0], (
+                "Expect a seperate dimension for timesteps in RasterImages."
+            )
         if image.shape[0] % num_bands_per_timestep != 0:
             raise ValueError(
@@ -62,6 +68,9 @@ class SelectBands(Transform):
                 [(start_channel_idx + band_idx) for band_idx in self.band_indices]
             )
-        result = image[wanted_bands]
-        write_selector(input_dict, target_dict, self.output_selector, result)
+        if isinstance(image, RasterImage):
+            image.image = image.image[wanted_bands]
+        else:
+            image = image[wanted_bands]
+        write_selector(input_dict, target_dict, self.output_selector, image)
         return input_dict, target_dict

rslearn/train/transforms/sentinel1.py CHANGED Viewed

@@ -4,6 +4,8 @@ from typing import Any
 import torch
+from rslearn.train.model_context import RasterImage
 from .transform import Transform
@@ -31,18 +33,31 @@ class Sentinel1ToDecibels(Transform):
         self.from_decibels = from_decibels
         self.epsilon = epsilon
-    def apply_image(self, image: torch.Tensor) -> torch.Tensor:
+    def apply_image(
+        self, image: torch.Tensor | RasterImage
+    ) -> torch.Tensor | RasterImage:
         """Normalize the specified image.
         Args:
             image: the image to transform.
         """
+        if isinstance(image, torch.Tensor):
+            image_to_process = image
+        else:
+            image_to_process = image.image
         if self.from_decibels:
             # Decibels to linear scale.
-            return torch.pow(10.0, image / 10.0)
+            image_to_process = torch.pow(10.0, image_to_process / 10.0)
         else:
             # Linear scale to decibels.
-            return 10 * torch.log10(torch.clamp(image, min=self.epsilon))
+            image_to_process = 10 * torch.log10(
+                torch.clamp(image_to_process, min=self.epsilon)
+            )
+        if isinstance(image, torch.Tensor):
+            return image_to_process
+        else:
+            image.image = image_to_process
+            return image
     def forward(
         self, input_dict: dict[str, Any], target_dict: dict[str, Any]

rslearn/utils/geometry.py CHANGED Viewed

@@ -116,6 +116,79 @@ class Projection:
 WGS84_PROJECTION = Projection(CRS.from_epsg(WGS84_EPSG), 1, 1)
+class ResolutionFactor:
+    """Multiplier for the resolution in a Projection.
+    The multiplier is either an integer x, or the inverse of an integer (1/x).
+    Factors greater than 1 increase the projection_units/pixel resolution, increasing
+    the resolution (more pixels per projection unit). Factors less than 1 make it coarser
+    (less pixels).
+    """
+    def __init__(self, numerator: int = 1, denominator: int = 1):
+        """Create a new ResolutionFactor.
+        Args:
+            numerator: the numerator of the fraction.
+            denominator: the denominator of the fraction. If set, numerator must be 1.
+        """
+        if numerator != 1 and denominator != 1:
+            raise ValueError("one of numerator or denominator must be 1")
+        if not isinstance(numerator, int) or not isinstance(denominator, int):
+            raise ValueError("numerator and denominator must be integers")
+        if numerator < 1 or denominator < 1:
+            raise ValueError("numerator and denominator must be >= 1")
+        self.numerator = numerator
+        self.denominator = denominator
+    def multiply_projection(self, projection: Projection) -> Projection:
+        """Multiply the projection by this factor."""
+        if self.denominator > 1:
+            return Projection(
+                projection.crs,
+                projection.x_resolution * self.denominator,
+                projection.y_resolution * self.denominator,
+            )
+        else:
+            return Projection(
+                projection.crs,
+                projection.x_resolution // self.numerator,
+                projection.y_resolution // self.numerator,
+            )
+    def multiply_bounds(self, bounds: PixelBounds) -> PixelBounds:
+        """Multiply the bounds by this factor.
+        When coarsening, the width and height of the given bounds must be a multiple of
+        the denominator.
+        """
+        if self.denominator > 1:
+            # Verify the width and height are multiples of the denominator.
+            # Otherwise the new width and height is not an integer.
+            width = bounds[2] - bounds[0]
+            height = bounds[3] - bounds[1]
+            if width % self.denominator != 0 or height % self.denominator != 0:
+                raise ValueError(
+                    f"width {width} or height {height} is not a multiple of the resolution factor {self.denominator}"
+                )
+            # TODO: an offset could be introduced by bounds not being a multiple
+            # of the denominator -> will need to decide how to handle that.
+            return (
+                bounds[0] // self.denominator,
+                bounds[1] // self.denominator,
+                bounds[2] // self.denominator,
+                bounds[3] // self.denominator,
+            )
+        else:
+            return (
+                bounds[0] * self.numerator,
+                bounds[1] * self.numerator,
+                bounds[2] * self.numerator,
+                bounds[3] * self.numerator,
+            )
 class STGeometry:
     """A spatiotemporal geometry.

rslearn/utils/jsonargparse.py CHANGED Viewed

@@ -8,6 +8,7 @@ from rasterio.crs import CRS
 from upath import UPath
 from rslearn.config.dataset import LayerConfig
+from rslearn.utils.geometry import ResolutionFactor
 if TYPE_CHECKING:
     from rslearn.data_sources.data_source import DataSourceContext
@@ -91,6 +92,68 @@ def data_source_context_deserializer(v: dict[str, Any]) -> "DataSourceContext":
     )
+def resolution_factor_serializer(v: ResolutionFactor) -> str:
+    """Serialize ResolutionFactor for jsonargparse.
+    Args:
+        v: the ResolutionFactor object.
+    Returns:
+        the ResolutionFactor encoded to string
+    """
+    if hasattr(v, "init_args"):
+        init_args = v.init_args
+        return f"{init_args.numerator}/{init_args.denominator}"
+    return f"{v.numerator}/{v.denominator}"
+def resolution_factor_deserializer(v: int | str | dict) -> ResolutionFactor:
+    """Deserialize ResolutionFactor for jsonargparse.
+    Args:
+        v: the encoded ResolutionFactor.
+    Returns:
+        the decoded ResolutionFactor object
+    """
+    # Handle already-instantiated ResolutionFactor
+    if isinstance(v, ResolutionFactor):
+        return v
+    # Handle Namespace from class_path syntax (used during config save/validation)
+    if hasattr(v, "init_args"):
+        init_args = v.init_args
+        return ResolutionFactor(
+            numerator=init_args.numerator,
+            denominator=init_args.denominator,
+        )
+    # Handle dict from class_path syntax in YAML config
+    if isinstance(v, dict) and "init_args" in v:
+        init_args = v["init_args"]
+        return ResolutionFactor(
+            numerator=init_args.get("numerator", 1),
+            denominator=init_args.get("denominator", 1),
+        )
+    if isinstance(v, int):
+        return ResolutionFactor(numerator=v)
+    elif isinstance(v, str):
+        parts = v.split("/")
+        if len(parts) == 1:
+            return ResolutionFactor(numerator=int(parts[0]))
+        elif len(parts) == 2:
+            return ResolutionFactor(
+                numerator=int(parts[0]),
+                denominator=int(parts[1]),
+            )
+        else:
+            raise ValueError("expected resolution factor to be of the form x or 1/x")
+    else:
+        raise ValueError("expected resolution factor to be str or int")
 def init_jsonargparse() -> None:
     """Initialize custom jsonargparse serializers."""
     global INITIALIZED
@@ -100,6 +163,9 @@ def init_jsonargparse() -> None:
     jsonargparse.typing.register_type(
         datetime, datetime_serializer, datetime_deserializer
     )
+    jsonargparse.typing.register_type(
+        ResolutionFactor, resolution_factor_serializer, resolution_factor_deserializer
+    )
     from rslearn.data_sources.data_source import DataSourceContext

{rslearn-0.0.18.dist-info → rslearn-0.0.20.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: rslearn
-Version: 0.0.18
+Version: 0.0.20
 Summary: A library for developing remote sensing datasets and models
 Author: OlmoEarth Team
 License:                                  Apache License

rslearn 0.0.18__py3-none-any.whl → 0.0.20__py3-none-any.whl

rslearn 0.0.18py3-none-any.whl → 0.0.20py3-none-any.whl