PyPI - rslearn - Versions diffs - 0.0.1__py3-none-any.whl → 0.0.21__py3-none-any.whl - Mend

rslearn 0.0.1py3-none-any.whl → 0.0.21py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (166) hide show

rslearn/arg_parser.py +31 -0
rslearn/config/__init__.py +6 -12
rslearn/config/dataset.py +520 -401
rslearn/const.py +9 -15
rslearn/data_sources/__init__.py +8 -23
rslearn/data_sources/aws_landsat.py +242 -98
rslearn/data_sources/aws_open_data.py +111 -151
rslearn/data_sources/aws_sentinel1.py +131 -0
rslearn/data_sources/climate_data_store.py +471 -0
rslearn/data_sources/copernicus.py +884 -12
rslearn/data_sources/data_source.py +43 -12
rslearn/data_sources/earthdaily.py +484 -0
rslearn/data_sources/earthdata_srtm.py +282 -0
rslearn/data_sources/eurocrops.py +242 -0
rslearn/data_sources/gcp_public_data.py +578 -222
rslearn/data_sources/google_earth_engine.py +461 -135
rslearn/data_sources/local_files.py +219 -150
rslearn/data_sources/openstreetmap.py +51 -89
rslearn/data_sources/planet.py +24 -60
rslearn/data_sources/planet_basemap.py +275 -0
rslearn/data_sources/planetary_computer.py +798 -0
rslearn/data_sources/usda_cdl.py +195 -0
rslearn/data_sources/usgs_landsat.py +115 -83
rslearn/data_sources/utils.py +249 -61
rslearn/data_sources/vector_source.py +1 -0
rslearn/data_sources/worldcereal.py +449 -0
rslearn/data_sources/worldcover.py +144 -0
rslearn/data_sources/worldpop.py +153 -0
rslearn/data_sources/xyz_tiles.py +150 -107
rslearn/dataset/__init__.py +8 -2
rslearn/dataset/add_windows.py +2 -2
rslearn/dataset/dataset.py +40 -51
rslearn/dataset/handler_summaries.py +131 -0
rslearn/dataset/manage.py +313 -74
rslearn/dataset/materialize.py +431 -107
rslearn/dataset/remap.py +29 -4
rslearn/dataset/storage/__init__.py +1 -0
rslearn/dataset/storage/file.py +202 -0
rslearn/dataset/storage/storage.py +140 -0
rslearn/dataset/window.py +181 -44
rslearn/lightning_cli.py +454 -0
rslearn/log_utils.py +24 -0
rslearn/main.py +384 -181
rslearn/models/anysat.py +215 -0
rslearn/models/attention_pooling.py +177 -0
rslearn/models/clay/clay.py +231 -0
rslearn/models/clay/configs/metadata.yaml +295 -0
rslearn/models/clip.py +68 -0
rslearn/models/component.py +111 -0
rslearn/models/concatenate_features.py +103 -0
rslearn/models/conv.py +63 -0
rslearn/models/croma.py +306 -0
rslearn/models/detr/__init__.py +5 -0
rslearn/models/detr/box_ops.py +103 -0
rslearn/models/detr/detr.py +504 -0
rslearn/models/detr/matcher.py +107 -0
rslearn/models/detr/position_encoding.py +114 -0
rslearn/models/detr/transformer.py +429 -0
rslearn/models/detr/util.py +24 -0
rslearn/models/dinov3.py +177 -0
rslearn/models/faster_rcnn.py +30 -28
rslearn/models/feature_center_crop.py +53 -0
rslearn/models/fpn.py +19 -8
rslearn/models/galileo/__init__.py +5 -0
rslearn/models/galileo/galileo.py +595 -0
rslearn/models/galileo/single_file_galileo.py +1678 -0
rslearn/models/module_wrapper.py +65 -0
rslearn/models/molmo.py +69 -0
rslearn/models/multitask.py +384 -28
rslearn/models/olmoearth_pretrain/__init__.py +1 -0
rslearn/models/olmoearth_pretrain/model.py +421 -0
rslearn/models/olmoearth_pretrain/norm.py +86 -0
rslearn/models/panopticon.py +170 -0
rslearn/models/panopticon_data/sensors/drone.yaml +32 -0
rslearn/models/panopticon_data/sensors/enmap.yaml +904 -0
rslearn/models/panopticon_data/sensors/goes.yaml +9 -0
rslearn/models/panopticon_data/sensors/himawari.yaml +9 -0
rslearn/models/panopticon_data/sensors/intuition.yaml +606 -0
rslearn/models/panopticon_data/sensors/landsat8.yaml +84 -0
rslearn/models/panopticon_data/sensors/modis_terra.yaml +99 -0
rslearn/models/panopticon_data/sensors/qb2_ge1.yaml +34 -0
rslearn/models/panopticon_data/sensors/sentinel1.yaml +85 -0
rslearn/models/panopticon_data/sensors/sentinel2.yaml +97 -0
rslearn/models/panopticon_data/sensors/superdove.yaml +60 -0
rslearn/models/panopticon_data/sensors/wv23.yaml +63 -0
rslearn/models/pick_features.py +17 -10
rslearn/models/pooling_decoder.py +60 -7
rslearn/models/presto/__init__.py +5 -0
rslearn/models/presto/presto.py +297 -0
rslearn/models/presto/single_file_presto.py +926 -0
rslearn/models/prithvi.py +1147 -0
rslearn/models/resize_features.py +59 -0
rslearn/models/sam2_enc.py +13 -9
rslearn/models/satlaspretrain.py +38 -18
rslearn/models/simple_time_series.py +188 -77
rslearn/models/singletask.py +24 -13
rslearn/models/ssl4eo_s12.py +40 -30
rslearn/models/swin.py +44 -32
rslearn/models/task_embedding.py +250 -0
rslearn/models/terramind.py +256 -0
rslearn/models/trunk.py +139 -0
rslearn/models/unet.py +68 -22
rslearn/models/upsample.py +48 -0
rslearn/models/use_croma.py +508 -0
rslearn/template_params.py +26 -0
rslearn/tile_stores/__init__.py +41 -18
rslearn/tile_stores/default.py +409 -0
rslearn/tile_stores/tile_store.py +236 -132
rslearn/train/all_patches_dataset.py +530 -0
rslearn/train/callbacks/adapters.py +53 -0
rslearn/train/callbacks/freeze_unfreeze.py +348 -17
rslearn/train/callbacks/gradients.py +129 -0
rslearn/train/callbacks/peft.py +116 -0
rslearn/train/data_module.py +444 -20
rslearn/train/dataset.py +588 -235
rslearn/train/lightning_module.py +192 -62
rslearn/train/model_context.py +88 -0
rslearn/train/optimizer.py +31 -0
rslearn/train/prediction_writer.py +319 -84
rslearn/train/scheduler.py +92 -0
rslearn/train/tasks/classification.py +55 -28
rslearn/train/tasks/detection.py +132 -76
rslearn/train/tasks/embedding.py +120 -0
rslearn/train/tasks/multi_task.py +28 -14
rslearn/train/tasks/per_pixel_regression.py +291 -0
rslearn/train/tasks/regression.py +161 -44
rslearn/train/tasks/segmentation.py +428 -53
rslearn/train/tasks/task.py +6 -5
rslearn/train/transforms/__init__.py +1 -1
rslearn/train/transforms/concatenate.py +54 -10
rslearn/train/transforms/crop.py +29 -11
rslearn/train/transforms/flip.py +18 -6
rslearn/train/transforms/mask.py +78 -0
rslearn/train/transforms/normalize.py +101 -17
rslearn/train/transforms/pad.py +19 -7
rslearn/train/transforms/resize.py +83 -0
rslearn/train/transforms/select_bands.py +76 -0
rslearn/train/transforms/sentinel1.py +75 -0
rslearn/train/transforms/transform.py +89 -70
rslearn/utils/__init__.py +2 -6
rslearn/utils/array.py +8 -6
rslearn/utils/feature.py +2 -2
rslearn/utils/fsspec.py +90 -1
rslearn/utils/geometry.py +347 -7
rslearn/utils/get_utm_ups_crs.py +2 -3
rslearn/utils/grid_index.py +5 -5
rslearn/utils/jsonargparse.py +178 -0
rslearn/utils/mp.py +4 -3
rslearn/utils/raster_format.py +268 -116
rslearn/utils/rtree_index.py +64 -17
rslearn/utils/sqlite_index.py +7 -1
rslearn/utils/vector_format.py +252 -97
{rslearn-0.0.1.dist-info → rslearn-0.0.21.dist-info}/METADATA +532 -283
rslearn-0.0.21.dist-info/RECORD +167 -0
{rslearn-0.0.1.dist-info → rslearn-0.0.21.dist-info}/WHEEL +1 -1
rslearn-0.0.21.dist-info/licenses/NOTICE +115 -0
rslearn/data_sources/raster_source.py +0 -309
rslearn/models/registry.py +0 -5
rslearn/tile_stores/file.py +0 -242
rslearn/utils/mgrs.py +0 -24
rslearn/utils/utils.py +0 -22
rslearn-0.0.1.dist-info/RECORD +0 -88
/rslearn/{data_sources/geotiff.py → py.typed} +0 -0
{rslearn-0.0.1.dist-info → rslearn-0.0.21.dist-info}/entry_points.txt +0 -0
{rslearn-0.0.1.dist-info → rslearn-0.0.21.dist-info/licenses}/LICENSE +0 -0
{rslearn-0.0.1.dist-info → rslearn-0.0.21.dist-info}/top_level.txt +0 -0

rslearn/train/transforms/concatenate.py CHANGED Viewed

@@ -1,8 +1,21 @@
-"""Normalization transforms."""
+"""Concatenate bands across multiple image inputs."""
+from datetime import datetime
+from enum import Enum
+from typing import Any
 import torch
-from .transform import Transform
+from rslearn.train.model_context import RasterImage
+from .transform import Transform, read_selector, write_selector
+class ConcatenateDim(Enum):
+    """Enum for concatenation dimensions."""
+    CHANNEL = 0
+    TIME = 1
 class Concatenate(Transform):
@@ -12,6 +25,7 @@ class Concatenate(Transform):
         self,
         selections: dict[str, list[int]],
         output_selector: str,
+        concatenate_dim: ConcatenateDim | int = ConcatenateDim.TIME,
     ):
         """Initialize a new Concatenate.
@@ -19,12 +33,20 @@ class Concatenate(Transform):
             selections: map from selector to list of band indices in that input to
                 retain, or empty list to use all bands.
             output_selector: the output selector under which to save the concatenate image.
+            concatenate_dim: the dimension against which to concatenate the inputs
         """
         super().__init__()
         self.selections = selections
         self.output_selector = output_selector
+        self.concatenate_dim = (
+            concatenate_dim.value
+            if isinstance(concatenate_dim, ConcatenateDim)
+            else concatenate_dim
+        )
-    def forward(self, input_dict, target_dict):
+    def forward(
+        self, input_dict: dict[str, Any], target_dict: dict[str, Any]
+    ) -> tuple[dict[str, Any], dict[str, Any]]:
         """Apply concatenation over the inputs and targets.
         Args:
@@ -32,14 +54,36 @@ class Concatenate(Transform):
             target_dict: the target
         Returns:
-            normalized (input_dicts, target_dicts) tuple
+            concatenated (input_dicts, target_dicts) tuple. If one of the
+            specified inputs is a RasterImage, a RasterImage will be returned.
+            Otherwise it will be a torch.Tensor.
         """
         images = []
+        return_raster_image: bool = False
+        timestamps: list[tuple[datetime, datetime]] | None = None
         for selector, wanted_bands in self.selections.items():
-            image = self.read_selector(input_dict, target_dict, selector)
-            if wanted_bands:
-                image = image[wanted_bands, :, :]
-            images.append(image)
-        result = torch.concatenate(images, dim=0)
-        self.write_selector(input_dict, target_dict, self.output_selector, result)
+            image = read_selector(input_dict, target_dict, selector)
+            if isinstance(image, torch.Tensor):
+                if wanted_bands:
+                    image = image[wanted_bands, :, :]
+                images.append(image)
+            elif isinstance(image, RasterImage):
+                return_raster_image = True
+                if wanted_bands:
+                    images.append(image.image[wanted_bands, :, :])
+                else:
+                    images.append(image.image)
+                if timestamps is None:
+                    if image.timestamps is not None:
+                        # assume all concatenated modalities have the same
+                        # number of timestamps
+                        timestamps = image.timestamps
+        if return_raster_image:
+            result = RasterImage(
+                torch.concatenate(images, dim=self.concatenate_dim),
+                timestamps=timestamps,
+            )
+        else:
+            result = torch.concatenate(images, dim=self.concatenate_dim)
+        write_selector(input_dict, target_dict, self.output_selector, result)
         return input_dict, target_dict

rslearn/train/transforms/crop.py CHANGED Viewed

@@ -5,7 +5,9 @@ from typing import Any
 import torch
 import torchvision
-from .transform import Transform
+from rslearn.train.model_context import RasterImage
+from .transform import Transform, read_selector
 class Crop(Transform):
@@ -69,7 +71,9 @@ class Crop(Transform):
             "remove_from_top": remove_from_top,
         }
-    def apply_image(self, image: torch.Tensor, state: dict[str, bool]) -> torch.Tensor:
+    def apply_image(
+        self, image: RasterImage | torch.Tensor, state: dict[str, Any]
+    ) -> RasterImage | torch.Tensor:
         """Apply the sampled state on the specified image.
         Args:
@@ -80,13 +84,23 @@ class Crop(Transform):
         crop_size = state["crop_size"] * image.shape[-1] // image_shape[1]
         remove_from_left = state["remove_from_left"] * image.shape[-1] // image_shape[1]
         remove_from_top = state["remove_from_top"] * image.shape[-2] // image_shape[0]
-        return torchvision.transforms.functional.crop(
-            image,
-            top=remove_from_top,
-            left=remove_from_left,
-            height=crop_size,
-            width=crop_size,
-        )
+        if isinstance(image, RasterImage):
+            image.image = torchvision.transforms.functional.crop(
+                image.image,
+                top=remove_from_top,
+                left=remove_from_left,
+                height=crop_size,
+                width=crop_size,
+            )
+        else:
+            image = torchvision.transforms.functional.crop(
+                image,
+                top=remove_from_top,
+                left=remove_from_left,
+                height=crop_size,
+                width=crop_size,
+            )
+        return image
     def apply_boxes(self, boxes: Any, state: dict[str, bool]) -> torch.Tensor:
         """Apply the sampled state on the specified image.
@@ -97,7 +111,9 @@ class Crop(Transform):
         """
         raise NotImplementedError
-    def forward(self, input_dict, target_dict):
+    def forward(
+        self, input_dict: dict[str, Any], target_dict: dict[str, Any]
+    ) -> tuple[dict[str, Any], dict[str, Any]]:
         """Apply transform over the inputs and targets.
         Args:
@@ -109,13 +125,15 @@ class Crop(Transform):
         """
         smallest_image_shape = None
         for selector in self.image_selectors:
-            image = self.read_selector(input_dict, target_dict, selector)
+            image = read_selector(input_dict, target_dict, selector)
             if (
                 smallest_image_shape is None
                 or image.shape[-1] < smallest_image_shape[1]
             ):
                 smallest_image_shape = image.shape[-2:]
+        if smallest_image_shape is None:
+            raise ValueError("No image found to crop")
         state = self.sample_state(smallest_image_shape)
         self.apply_fn(

rslearn/train/transforms/flip.py CHANGED Viewed

@@ -1,7 +1,11 @@
 """Flip transform."""
+from typing import Any
 import torch
+from rslearn.train.model_context import RasterImage
 from .transform import Transform
@@ -46,17 +50,23 @@ class Flip(Transform):
             "vertical": vertical,
         }
-    def apply_image(self, image: torch.Tensor, state: dict[str, bool]) -> torch.Tensor:
+    def apply_image(self, image: RasterImage, state: dict[str, bool]) -> RasterImage:
         """Apply the sampled state on the specified image.
         Args:
             image: the image to transform.
             state: the sampled state.
         """
-        if state["horizontal"]:
-            image = torch.flip(image, dims=[-1])
-        if state["vertical"]:
-            image = torch.flip(image, dims=[-2])
+        if isinstance(image, RasterImage):
+            if state["horizontal"]:
+                image.image = torch.flip(image.image, dims=[-1])
+            if state["vertical"]:
+                image.image = torch.flip(image.image, dims=[-2])
+        elif isinstance(image, torch.Tensor):
+            if state["horizontal"]:
+                image = torch.flip(image, dims=[-1])
+            if state["vertical"]:
+                image = torch.flip(image, dims=[-2])
         return image
     def apply_boxes(
@@ -90,7 +100,9 @@ class Flip(Transform):
             )
         return boxes
-    def forward(self, input_dict, target_dict):
+    def forward(
+        self, input_dict: dict[str, Any], target_dict: dict[str, Any]
+    ) -> tuple[dict[str, Any], dict[str, Any]]:
         """Apply transform over the inputs and targets.
         Args:

rslearn/train/transforms/mask.py ADDED Viewed

@@ -0,0 +1,78 @@
+"""Mask transform."""
+import torch
+from rslearn.train.model_context import RasterImage
+from rslearn.train.transforms.transform import Transform, read_selector
+class Mask(Transform):
+    """Apply a mask to one or more images.
+    This uses one (mask) image input to mask another (target) image input. The value of
+    the target image is set to the mask value everywhere where the mask image is 0.
+    """
+    def __init__(
+        self,
+        selectors: list[str] = ["image"],
+        mask_selector: str = "mask",
+        mask_value: int = 0,
+    ):
+        """Initialize a new Mask.
+        Args:
+            selectors: images to mask.
+            mask_selector: the selector for the mask image to apply.
+            mask_value: set each image in selectors to this value where the image
+                corresponding to the mask_selector is 0.
+        """
+        super().__init__()
+        self.selectors = selectors
+        self.mask_selector = mask_selector
+        self.mask_value = mask_value
+    def apply_image(
+        self, image: torch.Tensor | RasterImage, mask: torch.Tensor | RasterImage
+    ) -> torch.Tensor | RasterImage:
+        """Apply the mask on the image.
+        Args:
+            image: the image
+            mask: the mask
+        Returns:
+            masked image
+        """
+        # Tile the mask to have same number of bands as the image.
+        if isinstance(mask, RasterImage):
+            mask = mask.image
+        if image.shape[0] != mask.shape[0]:
+            if mask.shape[0] != 1:
+                raise ValueError(
+                    "expected mask to either have same bands as image, or one band"
+                )
+            mask = mask.repeat(image.shape[0], 1, 1)
+        if isinstance(image, torch.Tensor):
+            image[mask == 0] = self.mask_value
+        else:
+            image.image[mask == 0] = self.mask_value
+        return image
+    def forward(self, input_dict: dict, target_dict: dict) -> tuple[dict, dict]:
+        """Apply mask.
+        Args:
+            input_dict: the input
+            target_dict: the target
+        Returns:
+            normalized (input_dicts, target_dicts) tuple
+        """
+        mask = read_selector(input_dict, target_dict, self.mask_selector)
+        self.apply_fn(
+            self.apply_image, input_dict, target_dict, self.selectors, mask=mask
+        )
+        return input_dict, target_dict

rslearn/train/transforms/normalize.py CHANGED Viewed

@@ -1,7 +1,11 @@
 """Normalization transforms."""
+from typing import Any
 import torch
+from rslearn.train.model_context import RasterImage
 from .transform import Transform
@@ -12,22 +16,31 @@ class Normalize(Transform):
         self,
         mean: float | list[float],
         std: float | list[float],
-        valid_range: tuple[float, float]
-        | tuple[list[float], list[float]]
-        | None = None,
+        valid_range: (
+            tuple[float, float] | tuple[list[float], list[float]] | None
+        ) = None,
         selectors: list[str] = ["image"],
         bands: list[int] | None = None,
-    ):
+        num_bands: int | None = None,
+    ) -> None:
         """Initialize a new Normalize.
         Result will be (input - mean) / std.
         Args:
             mean: a single value or one mean per channel
-            std: a single value or one std per channel
+            std: a single value or one std per channel (must match the shape of mean)
             valid_range: optionally clip to a minimum and maximum value
             selectors: image items to transform
-            bands: optionally restrict the normalization to these bands
+            bands: optionally restrict the normalization to these band indices. If set,
+                mean and std must either be one value, or have length equal to the
+                number of band indices passed here.
+            num_bands: the number of bands per image, to distinguish different images
+                in a time series. If set, then the bands list is repeated for each
+                image, e.g. if bands=[2] then we apply normalization on images[2],
+                images[2+num_bands], images[2+num_bands*2], etc. Or if the bands list
+                is not set, then we apply the mean and std on each image in the time
+                series.
         """
         super().__init__()
         self.mean = torch.tensor(mean)
@@ -41,27 +54,98 @@ class Normalize(Transform):
             self.valid_max = None
         self.selectors = selectors
-        self.bands = bands
+        self.bands = torch.tensor(bands) if bands is not None else None
+        self.num_bands = num_bands
-    def apply_image(self, image: torch.Tensor) -> torch.Tensor:
+    def apply_image(
+        self, image: torch.Tensor | RasterImage
+    ) -> torch.Tensor | RasterImage:
         """Normalize the specified image.
         Args:
             image: the image to transform.
         """
-        if self.bands:
-            image[self.bands] = (image[self.bands] - self.mean) / self.std
-            if self.valid_min is not None:
-                image[self.bands] = torch.clamp(
-                    image[self.bands], min=self.valid_min, max=self.valid_max
+        def _repeat_mean_and_std(
+            image_channels: int, num_bands: int | None, is_raster_image: bool
+        ) -> tuple[torch.Tensor, torch.Tensor]:
+            """Get mean and std tensor that are suitable for applying on the image."""
+            # We only need to repeat the tensor if both of these are true:
+            # - The mean/std are not just one scalar.
+            # - self.num_bands is set, otherwise we treat the input as a single image.
+            if len(self.mean.shape) == 0:
+                return self.mean, self.std
+            if num_bands is None:
+                return self.mean, self.std
+            num_images = image_channels // num_bands
+            if is_raster_image:
+                # add an extra T dimension, CTHW
+                return self.mean.repeat(num_images)[
+                    :, None, None, None
+                ], self.std.repeat(num_images)[:, None, None, None]
+            else:
+                # add an extra T dimension, CTHW
+                return self.mean.repeat(num_images)[:, None, None], self.std.repeat(
+                    num_images
+                )[:, None, None]
+        if self.bands is not None:
+            # User has provided band indices to normalize.
+            # If num_bands is set, then we repeat these for each image in the input
+            # image time series.
+            band_indices = self.bands
+            if self.num_bands:
+                num_images = image.shape[0] // self.num_bands
+                band_indices = torch.cat(
+                    [
+                        band_indices + image_idx * self.num_bands
+                        for image_idx in range(num_images)
+                    ],
+                    dim=0,
                 )
+            # We use len(self.bands) here because that is how many bands per timestep
+            # we are actually processing with the mean/std.
+            mean, std = _repeat_mean_and_std(
+                image_channels=len(band_indices),
+                num_bands=len(self.bands),
+                is_raster_image=isinstance(image, RasterImage),
+            )
+            if isinstance(image, torch.Tensor):
+                image[band_indices] = (image[band_indices] - mean) / std
+                if self.valid_min is not None:
+                    image[band_indices] = torch.clamp(
+                        image[band_indices], min=self.valid_min, max=self.valid_max
+                    )
+            else:
+                image.image[band_indices] = (image.image[band_indices] - mean) / std
+                if self.valid_min is not None:
+                    image.image[band_indices] = torch.clamp(
+                        image.image[band_indices],
+                        min=self.valid_min,
+                        max=self.valid_max,
+                    )
         else:
-            image = (image - self.mean) / self.std
-            if self.valid_min is not None:
-                image = torch.clamp(image, min=self.valid_min, max=self.valid_max)
+            mean, std = _repeat_mean_and_std(
+                image_channels=image.shape[0],
+                num_bands=self.num_bands,
+                is_raster_image=isinstance(image, RasterImage),
+            )
+            if isinstance(image, torch.Tensor):
+                image = (image - mean) / std
+                if self.valid_min is not None:
+                    image = torch.clamp(image, min=self.valid_min, max=self.valid_max)
+            else:
+                image.image = (image.image - mean) / std
+                if self.valid_min is not None:
+                    image.image = torch.clamp(
+                        image.image, min=self.valid_min, max=self.valid_max
+                    )
         return image
-    def forward(self, input_dict, target_dict):
+    def forward(
+        self, input_dict: dict[str, Any], target_dict: dict[str, Any]
+    ) -> tuple[dict[str, Any], dict[str, Any]]:
         """Apply normalization over the inputs and targets.
         Args:

rslearn/train/transforms/pad.py CHANGED Viewed

@@ -5,6 +5,8 @@ from typing import Any
 import torch
 import torchvision
+from rslearn.train.model_context import RasterImage
 from .transform import Transform
@@ -25,8 +27,8 @@ class Pad(Transform):
         Args:
             size: the size to pad to, or a min/max range of pad sizes. If the image is
                 larger than this size, then it is cropped instead.
-            mode: "center" (default) to apply padding equally on all sides, or
-                "topleft" to only apply it on the bottom and right.
+            mode: "topleft" (default) to only apply padding on the bottom and right
+                sides, or "center" to apply padding equally on all sides.
             image_selectors: image items to transform.
             box_selectors: boxes items to transform.
         """
@@ -48,7 +50,9 @@ class Pad(Transform):
         """
         return {"size": torch.randint(low=self.size[0], high=self.size[1], size=())}
-    def apply_image(self, image: torch.Tensor, state: dict[str, bool]) -> torch.Tensor:
+    def apply_image(
+        self, image: RasterImage | torch.Tensor, state: dict[str, bool]
+    ) -> RasterImage | torch.Tensor:
         """Apply the sampled state on the specified image.
         Args:
@@ -64,11 +68,11 @@ class Pad(Transform):
         ) -> torch.Tensor:
             # Before/after must either be both non-negative or both negative.
             # >=0 indicates padding while <0 indicates cropping.
-            assert (before < 0 and after < 0) or (before >= 0 and after >= 0)
+            assert (before < 0 and after <= 0) or (before >= 0 and after >= 0)
             if before > 0:
                 # Padding.
                 if horizontal:
-                    padding_tuple = (before, after)
+                    padding_tuple: tuple = (before, after)
                 else:
                     padding_tuple = (before, after, 0, 0)
                 return torch.nn.functional.pad(im, padding_tuple)
@@ -101,8 +105,16 @@ class Pad(Transform):
             horizontal_pad = (horizontal_half, horizontal_extra - horizontal_half)
             vertical_pad = (vertical_half, vertical_extra - vertical_half)
-        image = apply_padding(image, True, horizontal_pad[0], horizontal_pad[1])
-        image = apply_padding(image, False, vertical_pad[0], vertical_pad[1])
+        if isinstance(image, RasterImage):
+            image.image = apply_padding(
+                image.image, True, horizontal_pad[0], horizontal_pad[1]
+            )
+            image.image = apply_padding(
+                image.image, False, vertical_pad[0], vertical_pad[1]
+            )
+        else:
+            image = apply_padding(image, True, horizontal_pad[0], horizontal_pad[1])
+            image = apply_padding(image, False, vertical_pad[0], vertical_pad[1])
         return image
     def apply_boxes(self, boxes: Any, state: dict[str, bool]) -> torch.Tensor:

rslearn/train/transforms/resize.py ADDED Viewed

@@ -0,0 +1,83 @@
+"""Resize transform."""
+from typing import Any
+import torch
+import torchvision
+from torchvision.transforms import InterpolationMode
+from rslearn.train.model_context import RasterImage
+from .transform import Transform
+INTERPOLATION_MODES = {
+    "nearest": InterpolationMode.NEAREST,
+    "nearest_exact": InterpolationMode.NEAREST_EXACT,
+    "bilinear": InterpolationMode.BILINEAR,
+    "bicubic": InterpolationMode.BICUBIC,
+}
+class Resize(Transform):
+    """Resizes inputs to a target size."""
+    def __init__(
+        self,
+        target_size: tuple[int, int],
+        selectors: list[str] = [],
+        interpolation: str = "nearest",
+    ):
+        """Initialize a resize transform.
+        Args:
+            target_size: the (height, width) to resize to.
+            selectors: items to transform.
+            interpolation: the interpolation mode to use for resizing.
+                Must be one of "nearest", "nearest_exact", "bilinear", or "bicubic".
+        """
+        super().__init__()
+        self.target_size = target_size
+        self.selectors = selectors
+        self.interpolation = INTERPOLATION_MODES[interpolation]
+    def apply_resize(
+        self, image: torch.Tensor | RasterImage
+    ) -> torch.Tensor | RasterImage:
+        """Apply resizing on the specified image.
+        If the image is 2D, it is unsqueezed to 3D and then squeezed
+        back after resizing.
+        Args:
+            image: the image to transform.
+        """
+        if isinstance(image, torch.Tensor):
+            if image.dim() == 2:
+                image = image.unsqueeze(0)  # (H, W) -> (1, H, W)
+                result = torchvision.transforms.functional.resize(
+                    image, self.target_size, self.interpolation
+                )
+                return result.squeeze(0)  # (1, H, W) -> (H, W)
+            return torchvision.transforms.functional.resize(
+                image, self.target_size, self.interpolation
+            )
+        else:
+            image.image = torchvision.transforms.functional.resize(
+                image.image, self.target_size, self.interpolation
+            )
+            return image
+    def forward(
+        self, input_dict: dict[str, Any], target_dict: dict[str, Any]
+    ) -> tuple[dict[str, Any], dict[str, Any]]:
+        """Apply transform over the inputs and targets.
+        Args:
+            input_dict: the input
+            target_dict: the target
+        Returns:
+            transformed (input_dicts, target_dicts) tuple
+        """
+        self.apply_fn(self.apply_resize, input_dict, target_dict, self.selectors)
+        return input_dict, target_dict

rslearn 0.0.1__py3-none-any.whl → 0.0.21__py3-none-any.whl

rslearn 0.0.1py3-none-any.whl → 0.0.21py3-none-any.whl