PyPI - rslearn - Versions diffs - 0.0.18__py3-none-any.whl → 0.0.20__py3-none-any.whl - Mend

rslearn 0.0.18py3-none-any.whl → 0.0.20py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (55) hide show

rslearn/arg_parser.py +2 -9
rslearn/config/dataset.py +15 -16
rslearn/dataset/dataset.py +28 -22
rslearn/lightning_cli.py +22 -11
rslearn/main.py +1 -1
rslearn/models/anysat.py +35 -33
rslearn/models/attention_pooling.py +177 -0
rslearn/models/clip.py +5 -2
rslearn/models/component.py +12 -0
rslearn/models/croma.py +11 -3
rslearn/models/dinov3.py +2 -1
rslearn/models/faster_rcnn.py +2 -1
rslearn/models/galileo/galileo.py +58 -31
rslearn/models/module_wrapper.py +6 -1
rslearn/models/molmo.py +4 -2
rslearn/models/olmoearth_pretrain/model.py +206 -51
rslearn/models/olmoearth_pretrain/norm.py +5 -3
rslearn/models/panopticon.py +3 -1
rslearn/models/presto/presto.py +45 -15
rslearn/models/prithvi.py +9 -7
rslearn/models/sam2_enc.py +3 -1
rslearn/models/satlaspretrain.py +4 -1
rslearn/models/simple_time_series.py +43 -17
rslearn/models/ssl4eo_s12.py +19 -14
rslearn/models/swin.py +3 -1
rslearn/models/terramind.py +5 -4
rslearn/train/all_patches_dataset.py +96 -28
rslearn/train/dataset.py +102 -53
rslearn/train/model_context.py +35 -1
rslearn/train/scheduler.py +15 -0
rslearn/train/tasks/classification.py +8 -2
rslearn/train/tasks/detection.py +3 -2
rslearn/train/tasks/multi_task.py +2 -3
rslearn/train/tasks/per_pixel_regression.py +14 -5
rslearn/train/tasks/regression.py +8 -2
rslearn/train/tasks/segmentation.py +13 -4
rslearn/train/tasks/task.py +2 -2
rslearn/train/transforms/concatenate.py +45 -5
rslearn/train/transforms/crop.py +22 -8
rslearn/train/transforms/flip.py +13 -5
rslearn/train/transforms/mask.py +11 -2
rslearn/train/transforms/normalize.py +46 -15
rslearn/train/transforms/pad.py +15 -3
rslearn/train/transforms/resize.py +83 -0
rslearn/train/transforms/select_bands.py +11 -2
rslearn/train/transforms/sentinel1.py +18 -3
rslearn/utils/geometry.py +73 -0
rslearn/utils/jsonargparse.py +66 -0
{rslearn-0.0.18.dist-info → rslearn-0.0.20.dist-info}/METADATA +1 -1
{rslearn-0.0.18.dist-info → rslearn-0.0.20.dist-info}/RECORD +55 -53
{rslearn-0.0.18.dist-info → rslearn-0.0.20.dist-info}/WHEEL +0 -0
{rslearn-0.0.18.dist-info → rslearn-0.0.20.dist-info}/entry_points.txt +0 -0
{rslearn-0.0.18.dist-info → rslearn-0.0.20.dist-info}/licenses/LICENSE +0 -0
{rslearn-0.0.18.dist-info → rslearn-0.0.20.dist-info}/licenses/NOTICE +0 -0
{rslearn-0.0.18.dist-info → rslearn-0.0.20.dist-info}/top_level.txt +0 -0

rslearn/train/dataset.py CHANGED Viewed

@@ -8,6 +8,7 @@ import random
 import tempfile
 import time
 import uuid
+from datetime import datetime
 from typing import Any
 import torch
@@ -19,12 +20,18 @@ from rslearn.config import (
     DType,
     LayerConfig,
 )
+from rslearn.data_sources.data_source import Item
 from rslearn.dataset.dataset import Dataset
 from rslearn.dataset.storage.file import FileWindowStorage
-from rslearn.dataset.window import Window, get_layer_and_group_from_dir_name
+from rslearn.dataset.window import (
+    Window,
+    WindowLayerData,
+    get_layer_and_group_from_dir_name,
+)
 from rslearn.log_utils import get_logger
+from rslearn.train.model_context import RasterImage
 from rslearn.utils.feature import Feature
-from rslearn.utils.geometry import PixelBounds
+from rslearn.utils.geometry import PixelBounds, ResolutionFactor
 from rslearn.utils.mp import star_imap_unordered
 from .model_context import SampleMetadata
@@ -130,6 +137,10 @@ class DataInput:
     """Specification of a piece of data from a window that is needed for training.
     The DataInput includes which layer(s) the data can be obtained from for each window.
+    Note that this class is not a dataclass because jsonargparse does not play well
+    with dataclasses without enabling specialized options which we have not validated
+    will work with the rest of our code.
     """
     def __init__(
@@ -143,7 +154,9 @@ class DataInput:
         dtype: DType = DType.FLOAT32,
         load_all_layers: bool = False,
         load_all_item_groups: bool = False,
-    ) -> None:
+        resolution_factor: ResolutionFactor = ResolutionFactor(),
+        resampling: Resampling = Resampling.nearest,
+    ):
         """Initialize a new DataInput.
         Args:
@@ -166,6 +179,11 @@ class DataInput:
                 are reading from. By default, we assume the specified layer name is of
                 the form "{layer_name}.{group_idx}" and read that item group only. With
                 this option enabled, we ignore the group_idx and read all item groups.
+            resolution_factor: controls the resolution at which raster data is loaded for training.
+                By default (factor=1), data is loaded at the window resolution.
+                E.g. for a 64x64 window at 10 m/pixel with resolution_factor=1/2,
+                the resulting tensor is 32x32 (covering the same geographic area at 20 m/pixel).
+            resampling: resampling method (default nearest neighbor).
         """
         self.data_type = data_type
         self.layers = layers
@@ -176,6 +194,8 @@ class DataInput:
         self.dtype = dtype
         self.load_all_layers = load_all_layers
         self.load_all_item_groups = load_all_item_groups
+        self.resolution_factor = resolution_factor
+        self.resampling = resampling
 def read_raster_layer_for_data_input(
@@ -185,7 +205,8 @@ def read_raster_layer_for_data_input(
     group_idx: int,
     layer_config: LayerConfig,
     data_input: DataInput,
-) -> torch.Tensor:
+    layer_data: WindowLayerData | None,
+) -> tuple[torch.Tensor, tuple[datetime, datetime] | None]:
     """Read a raster layer for a DataInput.
     This scans the available rasters for the layer at the window to determine which
@@ -198,9 +219,11 @@ def read_raster_layer_for_data_input(
         group_idx: the item group.
         layer_config: the layer configuration.
         data_input: the DataInput that specifies the bands and dtype.
+        layer_data: the WindowLayerData associated with this layer and window.
     Returns:
-        tensor containing raster data.
+        RasterImage containing raster data and the timestamp associated
+            with that data.
     """
     # See what different sets of bands we need to read to get all the
     # configured bands.
@@ -233,15 +256,23 @@ def read_raster_layer_for_data_input(
             + f"window {window.name} layer {layer_name} group {group_idx}"
         )
+    # Get the projection and bounds to read under (multiply window resolution # by
+    # the specified resolution factor).
+    final_projection = data_input.resolution_factor.multiply_projection(
+        window.projection
+    )
+    final_bounds = data_input.resolution_factor.multiply_bounds(bounds)
     image = torch.zeros(
-        (len(needed_bands), bounds[3] - bounds[1], bounds[2] - bounds[0]),
+        (
+            len(needed_bands),
+            final_bounds[3] - final_bounds[1],
+            final_bounds[2] - final_bounds[0],
+        ),
         dtype=get_torch_dtype(data_input.dtype),
     )
     for band_set, src_indexes, dst_indexes in needed_sets_and_indexes:
-        final_projection, final_bounds = band_set.get_final_projection_and_bounds(
-            window.projection, bounds
-        )
         if band_set.format is None:
             raise ValueError(f"No format specified for {layer_name}")
         raster_format = band_set.instantiate_raster_format()
@@ -249,49 +280,48 @@ def read_raster_layer_for_data_input(
             layer_name, band_set.bands, group_idx=group_idx
         )
-        # Previously we always read in the native projection of the data, and then
-        # zoom in or out (the resolution must be a power of two off) to match the
-        # window's resolution.
-        # However, this fails if the bounds are not multiples of the resolution factor.
-        # So we fallback to reading directly in the window projection if that is the
-        # case (which may be a bit slower).
-        is_bounds_zoomable = True
-        if band_set.zoom_offset < 0:
-            zoom_factor = 2 ** (-band_set.zoom_offset)
-            is_bounds_zoomable = (final_bounds[2] - final_bounds[0]) * zoom_factor == (
-                bounds[2] - bounds[0]
-            ) and (final_bounds[3] - final_bounds[1]) * zoom_factor == (
-                bounds[3] - bounds[1]
-            )
-        if is_bounds_zoomable:
-            src = raster_format.decode_raster(
-                raster_dir, final_projection, final_bounds
-            )
-            # Resize to patch size if needed.
-            # This is for band sets that are stored at a lower resolution.
-            # Here we assume that it is a multiple.
-            if src.shape[1:3] != image.shape[1:3]:
-                if src.shape[1] < image.shape[1]:
-                    factor = image.shape[1] // src.shape[1]
-                    src = src.repeat(repeats=factor, axis=1).repeat(
-                        repeats=factor, axis=2
-                    )
-                else:
-                    factor = src.shape[1] // image.shape[1]
-                    src = src[:, ::factor, ::factor]
-        else:
-            src = raster_format.decode_raster(
-                raster_dir, window.projection, bounds, resampling=Resampling.nearest
-            )
+        # TODO: previously we try to read based on band_set.zoom_offset when possible,
+        # and handle zooming in with torch.repeat (if resampling method is nearest
+        # neighbor). However, we have not benchmarked whether this actually improves
+        # data loading speed, so for simplicity, for now we let rasterio handle the
+        # resampling. If it really is much faster to handle it via torch, then it may
+        # make sense to bring back that functionality.
+        src = raster_format.decode_raster(
+            raster_dir, final_projection, final_bounds, resampling=Resampling.nearest
+        )
         image[dst_indexes, :, :] = torch.as_tensor(
             src[src_indexes, :, :].astype(data_input.dtype.get_numpy_dtype())
         )
-    return image
+    # add the timestamp. this is a tuple defining the start and end of the time range.
+    time_range = None
+    if layer_data is not None:
+        item = Item.deserialize(layer_data.serialized_item_groups[group_idx][0])
+        if item.geometry.time_range is not None:
+            # we assume if one layer data has a geometry & time range, all of them do
+            time_ranges = [
+                (
+                    datetime.fromisoformat(
+                        Item.deserialize(
+                            layer_data.serialized_item_groups[group_idx][idx]
+                        ).geometry.time_range[0]  # type: ignore
+                    ),
+                    datetime.fromisoformat(
+                        Item.deserialize(
+                            layer_data.serialized_item_groups[group_idx][idx]
+                        ).geometry.time_range[1]  # type: ignore
+                    ),
+                )
+                for idx in range(len(layer_data.serialized_item_groups[group_idx]))
+            ]
+            # take the min and max
+            time_range = (
+                min([t[0] for t in time_ranges]),
+                max([t[1] for t in time_ranges]),
+            )
+    return image, time_range
 def read_data_input(
@@ -300,7 +330,7 @@ def read_data_input(
     bounds: PixelBounds,
     data_input: DataInput,
     rng: random.Random,
-) -> torch.Tensor | list[Feature]:
+) -> RasterImage | list[Feature]:
     """Read the data specified by the DataInput from the window.
     Args:
@@ -342,15 +372,34 @@ def read_data_input(
         layers_to_read = [rng.choice(layer_options)]
     if data_input.data_type == "raster":
+        # load it once here
+        layer_datas = window.load_layer_datas()
         images: list[torch.Tensor] = []
+        time_ranges: list[tuple[datetime, datetime] | None] = []
         for layer_name, group_idx in layers_to_read:
             layer_config = dataset.layers[layer_name]
-            images.append(
-                read_raster_layer_for_data_input(
-                    window, bounds, layer_name, group_idx, layer_config, data_input
-                )
+            image, time_range = read_raster_layer_for_data_input(
+                window,
+                bounds,
+                layer_name,
+                group_idx,
+                layer_config,
+                data_input,
+                # some layers (e.g. "label_raster") won't have associated
+                # layer datas
+                layer_datas[layer_name] if layer_name in layer_datas else None,
             )
-        return torch.cat(images, dim=0)
+            if len(time_ranges) > 0:
+                if type(time_ranges[-1]) is not type(time_range):
+                    raise ValueError(
+                        f"All time ranges should be datetime tuples or None. Got {type(time_range)} amd {type(time_ranges[-1])}"
+                    )
+            images.append(image)
+            time_ranges.append(time_range)
+        return RasterImage(
+            torch.stack(images, dim=1),
+            time_ranges if time_ranges[0] is not None else None,  # type: ignore
+        )
     elif data_input.data_type == "vector":
         # We don't really support time series for vector data currently, we just

rslearn/train/model_context.py CHANGED Viewed

@@ -10,6 +10,40 @@ import torch
 from rslearn.utils.geometry import PixelBounds, Projection
+@dataclass
+class RasterImage:
+    """A raster image is a torch.tensor containing the images and their associated timestamps."""
+    # image is a 4D CTHW tensor
+    image: torch.Tensor
+    # if timestamps is not None, len(timestamps) must match the T dimension of the tensor
+    timestamps: list[tuple[datetime, datetime]] | None = None
+    @property
+    def shape(self) -> torch.Size:
+        """The shape of the image."""
+        return self.image.shape
+    def dim(self) -> int:
+        """The dim of the image."""
+        return self.image.dim()
+    @property
+    def dtype(self) -> torch.dtype:
+        """The image dtype."""
+        return self.image.dtype
+    def single_ts_to_chw_tensor(self) -> torch.Tensor:
+        """Single timestep models expect single timestep inputs.
+        This function (1) checks this raster image only has 1 timestep and
+        (2) returns the tensor for that (single) timestep (going from CTHW to CHW).
+        """
+        if self.image.shape[1] != 1:
+            raise ValueError(f"Expected a single timestep, got {self.image.shape[1]}")
+        return self.image[:, 0]
 @dataclass
 class SampleMetadata:
     """Metadata pertaining to an example."""
@@ -32,7 +66,7 @@ class ModelContext:
     """Context to pass to all model components."""
     # One input dict per example in the batch.
-    inputs: list[dict[str, torch.Tensor]]
+    inputs: list[dict[str, torch.Tensor | RasterImage]]
     # One SampleMetadata per example in the batch.
     metadatas: list[SampleMetadata]
     # Arbitrary dict that components can add to.

rslearn/train/scheduler.py CHANGED Viewed

@@ -8,6 +8,7 @@ from torch.optim.lr_scheduler import (
     CosineAnnealingLR,
     CosineAnnealingWarmRestarts,
     LRScheduler,
+    MultiStepLR,
     ReduceLROnPlateau,
 )
@@ -50,6 +51,20 @@ class PlateauScheduler(SchedulerFactory):
         return ReduceLROnPlateau(optimizer, **self.get_kwargs())
+@dataclass
+class MultiStepScheduler(SchedulerFactory):
+    """Step learning rate scheduler."""
+    milestones: list[int]
+    gamma: float | None = None
+    last_epoch: int | None = None
+    def build(self, optimizer: Optimizer) -> LRScheduler:
+        """Build the ReduceLROnPlateau scheduler."""
+        super().build(optimizer)
+        return MultiStepLR(optimizer, **self.get_kwargs())
 @dataclass
 class CosineAnnealingScheduler(SchedulerFactory):
     """Cosine annealing learning rate scheduler."""

rslearn/train/tasks/classification.py CHANGED Viewed

@@ -16,7 +16,12 @@ from torchmetrics.classification import (
 )
 from rslearn.models.component import FeatureVector, Predictor
-from rslearn.train.model_context import ModelContext, ModelOutput, SampleMetadata
+from rslearn.train.model_context import (
+    ModelContext,
+    ModelOutput,
+    RasterImage,
+    SampleMetadata,
+)
 from rslearn.utils import Feature, STGeometry
 from .task import BasicTask
@@ -99,7 +104,7 @@ class ClassificationTask(BasicTask):
     def process_inputs(
         self,
-        raw_inputs: dict[str, torch.Tensor | list[Feature]],
+        raw_inputs: dict[str, RasterImage | list[Feature]],
         metadata: SampleMetadata,
         load_targets: bool = True,
     ) -> tuple[dict[str, Any], dict[str, Any]]:
@@ -118,6 +123,7 @@ class ClassificationTask(BasicTask):
             return {}, {}
         data = raw_inputs["targets"]
+        assert isinstance(data, list)
         for feat in data:
             if feat.properties is None:
                 continue

rslearn/train/tasks/detection.py CHANGED Viewed

@@ -12,7 +12,7 @@ import torchmetrics.classification
 import torchvision
 from torchmetrics import Metric, MetricCollection
-from rslearn.train.model_context import SampleMetadata
+from rslearn.train.model_context import RasterImage, SampleMetadata
 from rslearn.utils import Feature, STGeometry
 from .task import BasicTask
@@ -127,7 +127,7 @@ class DetectionTask(BasicTask):
     def process_inputs(
         self,
-        raw_inputs: dict[str, torch.Tensor | list[Feature]],
+        raw_inputs: dict[str, RasterImage | list[Feature]],
         metadata: SampleMetadata,
         load_targets: bool = True,
     ) -> tuple[dict[str, Any], dict[str, Any]]:
@@ -152,6 +152,7 @@ class DetectionTask(BasicTask):
         valid = 1
         data = raw_inputs["targets"]
+        assert isinstance(data, list)
         for feat in data:
             if feat.properties is None:
                 continue

rslearn/train/tasks/multi_task.py CHANGED Viewed

@@ -3,10 +3,9 @@
 from typing import Any
 import numpy.typing as npt
-import torch
 from torchmetrics import Metric, MetricCollection
-from rslearn.train.model_context import SampleMetadata
+from rslearn.train.model_context import RasterImage, SampleMetadata
 from rslearn.utils import Feature
 from .task import Task
@@ -30,7 +29,7 @@ class MultiTask(Task):
     def process_inputs(
         self,
-        raw_inputs: dict[str, torch.Tensor | list[Feature]],
+        raw_inputs: dict[str, RasterImage | list[Feature]],
         metadata: SampleMetadata,
         load_targets: bool = True,
     ) -> tuple[dict[str, Any], dict[str, Any]]:

rslearn/train/tasks/per_pixel_regression.py CHANGED Viewed

@@ -9,7 +9,12 @@ import torchmetrics
 from torchmetrics import Metric, MetricCollection
 from rslearn.models.component import FeatureMaps, Predictor
-from rslearn.train.model_context import ModelContext, ModelOutput, SampleMetadata
+from rslearn.train.model_context import (
+    ModelContext,
+    ModelOutput,
+    RasterImage,
+    SampleMetadata,
+)
 from rslearn.utils.feature import Feature
 from .task import BasicTask
@@ -42,7 +47,7 @@ class PerPixelRegressionTask(BasicTask):
     def process_inputs(
         self,
-        raw_inputs: dict[str, torch.Tensor],
+        raw_inputs: dict[str, RasterImage | list[Feature]],
         metadata: SampleMetadata,
         load_targets: bool = True,
     ) -> tuple[dict[str, Any], dict[str, Any]]:
@@ -60,11 +65,15 @@ class PerPixelRegressionTask(BasicTask):
         if not load_targets:
             return {}, {}
-        assert raw_inputs["targets"].shape[0] == 1
-        labels = raw_inputs["targets"][0, :, :].float() * self.scale_factor
+        assert isinstance(raw_inputs["targets"], RasterImage)
+        assert raw_inputs["targets"].image.shape[0] == 1
+        assert raw_inputs["targets"].image.shape[1] == 1
+        labels = raw_inputs["targets"].image[0, 0, :, :].float() * self.scale_factor
         if self.nodata_value is not None:
-            valid = (raw_inputs["targets"][0, :, :] != self.nodata_value).float()
+            valid = (
+                raw_inputs["targets"].image[0, 0, :, :] != self.nodata_value
+            ).float()
         else:
             valid = torch.ones(labels.shape, dtype=torch.float32)

rslearn/train/tasks/regression.py CHANGED Viewed

@@ -11,7 +11,12 @@ from PIL import Image, ImageDraw
 from torchmetrics import Metric, MetricCollection
 from rslearn.models.component import FeatureVector, Predictor
-from rslearn.train.model_context import ModelContext, ModelOutput, SampleMetadata
+from rslearn.train.model_context import (
+    ModelContext,
+    ModelOutput,
+    RasterImage,
+    SampleMetadata,
+)
 from rslearn.utils.feature import Feature
 from rslearn.utils.geometry import STGeometry
@@ -63,7 +68,7 @@ class RegressionTask(BasicTask):
     def process_inputs(
         self,
-        raw_inputs: dict[str, torch.Tensor | list[Feature]],
+        raw_inputs: dict[str, RasterImage | list[Feature]],
         metadata: SampleMetadata,
         load_targets: bool = True,
     ) -> tuple[dict[str, Any], dict[str, Any]]:
@@ -82,6 +87,7 @@ class RegressionTask(BasicTask):
             return {}, {}
         data = raw_inputs["targets"]
+        assert isinstance(data, list)
         for feat in data:
             if feat.properties is None or self.filters is None:
                 continue

rslearn/train/tasks/segmentation.py CHANGED Viewed

@@ -1,5 +1,6 @@
 """Segmentation task."""
+from collections.abc import Mapping
 from typing import Any
 import numpy as np
@@ -9,7 +10,13 @@ import torchmetrics.classification
 from torchmetrics import Metric, MetricCollection
 from rslearn.models.component import FeatureMaps, Predictor
-from rslearn.train.model_context import ModelContext, ModelOutput, SampleMetadata
+from rslearn.train.model_context import (
+    ModelContext,
+    ModelOutput,
+    RasterImage,
+    SampleMetadata,
+)
+from rslearn.utils import Feature
 from .task import BasicTask
@@ -108,7 +115,7 @@ class SegmentationTask(BasicTask):
     def process_inputs(
         self,
-        raw_inputs: dict[str, torch.Tensor],
+        raw_inputs: Mapping[str, RasterImage | list[Feature]],
         metadata: SampleMetadata,
         load_targets: bool = True,
     ) -> tuple[dict[str, Any], dict[str, Any]]:
@@ -126,8 +133,10 @@ class SegmentationTask(BasicTask):
         if not load_targets:
             return {}, {}
-        assert raw_inputs["targets"].shape[0] == 1
-        labels = raw_inputs["targets"][0, :, :].long()
+        assert isinstance(raw_inputs["targets"], RasterImage)
+        assert raw_inputs["targets"].image.shape[0] == 1
+        assert raw_inputs["targets"].image.shape[1] == 1
+        labels = raw_inputs["targets"].image[0, 0, :, :].long()
         if self.class_id_mapping is not None:
             new_labels = labels.clone()

rslearn/train/tasks/task.py CHANGED Viewed

@@ -7,7 +7,7 @@ import numpy.typing as npt
 import torch
 from torchmetrics import MetricCollection
-from rslearn.train.model_context import SampleMetadata
+from rslearn.train.model_context import RasterImage, SampleMetadata
 from rslearn.utils import Feature
@@ -21,7 +21,7 @@ class Task:
     def process_inputs(
         self,
-        raw_inputs: dict[str, torch.Tensor | list[Feature]],
+        raw_inputs: dict[str, RasterImage | list[Feature]],
         metadata: SampleMetadata,
         load_targets: bool = True,
     ) -> tuple[dict[str, Any], dict[str, Any]]:

rslearn/train/transforms/concatenate.py CHANGED Viewed

@@ -1,12 +1,23 @@
 """Concatenate bands across multiple image inputs."""
+from datetime import datetime
+from enum import Enum
 from typing import Any
 import torch
+from rslearn.train.model_context import RasterImage
 from .transform import Transform, read_selector, write_selector
+class ConcatenateDim(Enum):
+    """Enum for concatenation dimensions."""
+    CHANNEL = 0
+    TIME = 1
 class Concatenate(Transform):
     """Concatenate bands across multiple image inputs."""
@@ -14,6 +25,7 @@ class Concatenate(Transform):
         self,
         selections: dict[str, list[int]],
         output_selector: str,
+        concatenate_dim: ConcatenateDim | int = ConcatenateDim.TIME,
     ):
         """Initialize a new Concatenate.
@@ -21,10 +33,16 @@ class Concatenate(Transform):
             selections: map from selector to list of band indices in that input to
                 retain, or empty list to use all bands.
             output_selector: the output selector under which to save the concatenate image.
+            concatenate_dim: the dimension against which to concatenate the inputs
         """
         super().__init__()
         self.selections = selections
         self.output_selector = output_selector
+        self.concatenate_dim = (
+            concatenate_dim.value
+            if isinstance(concatenate_dim, ConcatenateDim)
+            else concatenate_dim
+        )
     def forward(
         self, input_dict: dict[str, Any], target_dict: dict[str, Any]
@@ -36,14 +54,36 @@ class Concatenate(Transform):
             target_dict: the target
         Returns:
-            normalized (input_dicts, target_dicts) tuple
+            concatenated (input_dicts, target_dicts) tuple. If one of the
+            specified inputs is a RasterImage, a RasterImage will be returned.
+            Otherwise it will be a torch.Tensor.
         """
         images = []
+        return_raster_image: bool = False
+        timestamps: list[tuple[datetime, datetime]] | None = None
         for selector, wanted_bands in self.selections.items():
             image = read_selector(input_dict, target_dict, selector)
-            if wanted_bands:
-                image = image[wanted_bands, :, :]
-            images.append(image)
-        result = torch.concatenate(images, dim=0)
+            if isinstance(image, torch.Tensor):
+                if wanted_bands:
+                    image = image[wanted_bands, :, :]
+                images.append(image)
+            elif isinstance(image, RasterImage):
+                return_raster_image = True
+                if wanted_bands:
+                    images.append(image.image[wanted_bands, :, :])
+                else:
+                    images.append(image.image)
+                if timestamps is None:
+                    if image.timestamps is not None:
+                        # assume all concatenated modalities have the same
+                        # number of timestamps
+                        timestamps = image.timestamps
+        if return_raster_image:
+            result = RasterImage(
+                torch.concatenate(images, dim=self.concatenate_dim),
+                timestamps=timestamps,
+            )
+        else:
+            result = torch.concatenate(images, dim=self.concatenate_dim)
         write_selector(input_dict, target_dict, self.output_selector, result)
         return input_dict, target_dict

rslearn 0.0.18__py3-none-any.whl → 0.0.20__py3-none-any.whl

rslearn 0.0.18py3-none-any.whl → 0.0.20py3-none-any.whl