PyPI - rslearn - Versions diffs - 0.0.1__py3-none-any.whl → 0.0.21__py3-none-any.whl - Mend

rslearn 0.0.1py3-none-any.whl → 0.0.21py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (166) hide show

rslearn/arg_parser.py +31 -0
rslearn/config/__init__.py +6 -12
rslearn/config/dataset.py +520 -401
rslearn/const.py +9 -15
rslearn/data_sources/__init__.py +8 -23
rslearn/data_sources/aws_landsat.py +242 -98
rslearn/data_sources/aws_open_data.py +111 -151
rslearn/data_sources/aws_sentinel1.py +131 -0
rslearn/data_sources/climate_data_store.py +471 -0
rslearn/data_sources/copernicus.py +884 -12
rslearn/data_sources/data_source.py +43 -12
rslearn/data_sources/earthdaily.py +484 -0
rslearn/data_sources/earthdata_srtm.py +282 -0
rslearn/data_sources/eurocrops.py +242 -0
rslearn/data_sources/gcp_public_data.py +578 -222
rslearn/data_sources/google_earth_engine.py +461 -135
rslearn/data_sources/local_files.py +219 -150
rslearn/data_sources/openstreetmap.py +51 -89
rslearn/data_sources/planet.py +24 -60
rslearn/data_sources/planet_basemap.py +275 -0
rslearn/data_sources/planetary_computer.py +798 -0
rslearn/data_sources/usda_cdl.py +195 -0
rslearn/data_sources/usgs_landsat.py +115 -83
rslearn/data_sources/utils.py +249 -61
rslearn/data_sources/vector_source.py +1 -0
rslearn/data_sources/worldcereal.py +449 -0
rslearn/data_sources/worldcover.py +144 -0
rslearn/data_sources/worldpop.py +153 -0
rslearn/data_sources/xyz_tiles.py +150 -107
rslearn/dataset/__init__.py +8 -2
rslearn/dataset/add_windows.py +2 -2
rslearn/dataset/dataset.py +40 -51
rslearn/dataset/handler_summaries.py +131 -0
rslearn/dataset/manage.py +313 -74
rslearn/dataset/materialize.py +431 -107
rslearn/dataset/remap.py +29 -4
rslearn/dataset/storage/__init__.py +1 -0
rslearn/dataset/storage/file.py +202 -0
rslearn/dataset/storage/storage.py +140 -0
rslearn/dataset/window.py +181 -44
rslearn/lightning_cli.py +454 -0
rslearn/log_utils.py +24 -0
rslearn/main.py +384 -181
rslearn/models/anysat.py +215 -0
rslearn/models/attention_pooling.py +177 -0
rslearn/models/clay/clay.py +231 -0
rslearn/models/clay/configs/metadata.yaml +295 -0
rslearn/models/clip.py +68 -0
rslearn/models/component.py +111 -0
rslearn/models/concatenate_features.py +103 -0
rslearn/models/conv.py +63 -0
rslearn/models/croma.py +306 -0
rslearn/models/detr/__init__.py +5 -0
rslearn/models/detr/box_ops.py +103 -0
rslearn/models/detr/detr.py +504 -0
rslearn/models/detr/matcher.py +107 -0
rslearn/models/detr/position_encoding.py +114 -0
rslearn/models/detr/transformer.py +429 -0
rslearn/models/detr/util.py +24 -0
rslearn/models/dinov3.py +177 -0
rslearn/models/faster_rcnn.py +30 -28
rslearn/models/feature_center_crop.py +53 -0
rslearn/models/fpn.py +19 -8
rslearn/models/galileo/__init__.py +5 -0
rslearn/models/galileo/galileo.py +595 -0
rslearn/models/galileo/single_file_galileo.py +1678 -0
rslearn/models/module_wrapper.py +65 -0
rslearn/models/molmo.py +69 -0
rslearn/models/multitask.py +384 -28
rslearn/models/olmoearth_pretrain/__init__.py +1 -0
rslearn/models/olmoearth_pretrain/model.py +421 -0
rslearn/models/olmoearth_pretrain/norm.py +86 -0
rslearn/models/panopticon.py +170 -0
rslearn/models/panopticon_data/sensors/drone.yaml +32 -0
rslearn/models/panopticon_data/sensors/enmap.yaml +904 -0
rslearn/models/panopticon_data/sensors/goes.yaml +9 -0
rslearn/models/panopticon_data/sensors/himawari.yaml +9 -0
rslearn/models/panopticon_data/sensors/intuition.yaml +606 -0
rslearn/models/panopticon_data/sensors/landsat8.yaml +84 -0
rslearn/models/panopticon_data/sensors/modis_terra.yaml +99 -0
rslearn/models/panopticon_data/sensors/qb2_ge1.yaml +34 -0
rslearn/models/panopticon_data/sensors/sentinel1.yaml +85 -0
rslearn/models/panopticon_data/sensors/sentinel2.yaml +97 -0
rslearn/models/panopticon_data/sensors/superdove.yaml +60 -0
rslearn/models/panopticon_data/sensors/wv23.yaml +63 -0
rslearn/models/pick_features.py +17 -10
rslearn/models/pooling_decoder.py +60 -7
rslearn/models/presto/__init__.py +5 -0
rslearn/models/presto/presto.py +297 -0
rslearn/models/presto/single_file_presto.py +926 -0
rslearn/models/prithvi.py +1147 -0
rslearn/models/resize_features.py +59 -0
rslearn/models/sam2_enc.py +13 -9
rslearn/models/satlaspretrain.py +38 -18
rslearn/models/simple_time_series.py +188 -77
rslearn/models/singletask.py +24 -13
rslearn/models/ssl4eo_s12.py +40 -30
rslearn/models/swin.py +44 -32
rslearn/models/task_embedding.py +250 -0
rslearn/models/terramind.py +256 -0
rslearn/models/trunk.py +139 -0
rslearn/models/unet.py +68 -22
rslearn/models/upsample.py +48 -0
rslearn/models/use_croma.py +508 -0
rslearn/template_params.py +26 -0
rslearn/tile_stores/__init__.py +41 -18
rslearn/tile_stores/default.py +409 -0
rslearn/tile_stores/tile_store.py +236 -132
rslearn/train/all_patches_dataset.py +530 -0
rslearn/train/callbacks/adapters.py +53 -0
rslearn/train/callbacks/freeze_unfreeze.py +348 -17
rslearn/train/callbacks/gradients.py +129 -0
rslearn/train/callbacks/peft.py +116 -0
rslearn/train/data_module.py +444 -20
rslearn/train/dataset.py +588 -235
rslearn/train/lightning_module.py +192 -62
rslearn/train/model_context.py +88 -0
rslearn/train/optimizer.py +31 -0
rslearn/train/prediction_writer.py +319 -84
rslearn/train/scheduler.py +92 -0
rslearn/train/tasks/classification.py +55 -28
rslearn/train/tasks/detection.py +132 -76
rslearn/train/tasks/embedding.py +120 -0
rslearn/train/tasks/multi_task.py +28 -14
rslearn/train/tasks/per_pixel_regression.py +291 -0
rslearn/train/tasks/regression.py +161 -44
rslearn/train/tasks/segmentation.py +428 -53
rslearn/train/tasks/task.py +6 -5
rslearn/train/transforms/__init__.py +1 -1
rslearn/train/transforms/concatenate.py +54 -10
rslearn/train/transforms/crop.py +29 -11
rslearn/train/transforms/flip.py +18 -6
rslearn/train/transforms/mask.py +78 -0
rslearn/train/transforms/normalize.py +101 -17
rslearn/train/transforms/pad.py +19 -7
rslearn/train/transforms/resize.py +83 -0
rslearn/train/transforms/select_bands.py +76 -0
rslearn/train/transforms/sentinel1.py +75 -0
rslearn/train/transforms/transform.py +89 -70
rslearn/utils/__init__.py +2 -6
rslearn/utils/array.py +8 -6
rslearn/utils/feature.py +2 -2
rslearn/utils/fsspec.py +90 -1
rslearn/utils/geometry.py +347 -7
rslearn/utils/get_utm_ups_crs.py +2 -3
rslearn/utils/grid_index.py +5 -5
rslearn/utils/jsonargparse.py +178 -0
rslearn/utils/mp.py +4 -3
rslearn/utils/raster_format.py +268 -116
rslearn/utils/rtree_index.py +64 -17
rslearn/utils/sqlite_index.py +7 -1
rslearn/utils/vector_format.py +252 -97
{rslearn-0.0.1.dist-info → rslearn-0.0.21.dist-info}/METADATA +532 -283
rslearn-0.0.21.dist-info/RECORD +167 -0
{rslearn-0.0.1.dist-info → rslearn-0.0.21.dist-info}/WHEEL +1 -1
rslearn-0.0.21.dist-info/licenses/NOTICE +115 -0
rslearn/data_sources/raster_source.py +0 -309
rslearn/models/registry.py +0 -5
rslearn/tile_stores/file.py +0 -242
rslearn/utils/mgrs.py +0 -24
rslearn/utils/utils.py +0 -22
rslearn-0.0.1.dist-info/RECORD +0 -88
/rslearn/{data_sources/geotiff.py → py.typed} +0 -0
{rslearn-0.0.1.dist-info → rslearn-0.0.21.dist-info}/entry_points.txt +0 -0
{rslearn-0.0.1.dist-info → rslearn-0.0.21.dist-info/licenses}/LICENSE +0 -0
{rslearn-0.0.1.dist-info → rslearn-0.0.21.dist-info}/top_level.txt +0 -0

rslearn/models/resize_features.py ADDED Viewed

@@ -0,0 +1,59 @@
+"""The ResizeFeatures module."""
+from typing import Any
+import torch
+from rslearn.train.model_context import ModelContext
+from .component import (
+    FeatureMaps,
+    IntermediateComponent,
+)
+class ResizeFeatures(IntermediateComponent):
+    """Resize input features to new sizes."""
+    def __init__(
+        self,
+        out_sizes: list[tuple[int, int]],
+        mode: str = "bilinear",
+    ):
+        """Initialize a ResizeFeatures.
+        Args:
+            out_sizes: the output sizes of the feature maps. There must be one entry
+                for each input feature map.
+            mode: mode to pass to torch.nn.Upsample, e.g. "bilinear" (default) or
+                "nearest".
+        """
+        super().__init__()
+        layers = []
+        for size in out_sizes:
+            layers.append(
+                torch.nn.Upsample(
+                    size=size,
+                    mode=mode,
+                )
+            )
+        self.layers = torch.nn.ModuleList(layers)
+    def forward(self, intermediates: Any, context: ModelContext) -> FeatureMaps:
+        """Resize the input feature maps to new sizes.
+        Args:
+            intermediates: the outputs from the previous component, which must be a FeatureMaps.
+            context: the model context.
+        Returns:
+            resized feature maps
+        """
+        if not isinstance(intermediates, FeatureMaps):
+            raise ValueError("input to ResizeFeatures must be a FeatureMaps")
+        feat_maps = intermediates.feature_maps
+        resized_feat_maps = [
+            self.layers[idx](feat_map) for idx, feat_map in enumerate(feat_maps)
+        ]
+        return FeatureMaps(resized_feat_maps)

rslearn/models/sam2_enc.py CHANGED Viewed

@@ -1,14 +1,15 @@
 """SegmentAnything2 encoders."""
-from typing import Any
 import torch
-import torch.nn as nn
 from sam2.build_sam import build_sam2
 from upath import UPath
+from rslearn.train.model_context import ModelContext
+from .component import FeatureExtractor, FeatureMaps
-class SAM2Encoder(nn.Module):
+class SAM2Encoder(FeatureExtractor):
     """SAM2's image encoder."""
     def __init__(self, model_identifier: str) -> None:
@@ -84,18 +85,21 @@ class SAM2Encoder(nn.Module):
         del self.model.obj_ptr_proj
         del self.model.image_encoder.neck
-    def forward(self, inputs: list[dict[str, Any]]) -> list[torch.Tensor]:
+    def forward(self, context: ModelContext) -> FeatureMaps:
         """Extract multi-scale features from a batch of images.
         Args:
-            inputs: List of dictionaries, each containing the input image under the key 'image'.
+            context: the model context. Input dicts must have a key 'image' containing
+                the input for the SAM2 image encoder.
         Returns:
-            List[torch.Tensor]: Multi-scale feature tensors from the encoder.
+            feature maps from the encoder.
         """
-        images = torch.stack([inp["image"] for inp in inputs], dim=0)
+        images = torch.stack(
+            [inp["image"].single_ts_to_chw_tensor() for inp in context.inputs], dim=0
+        )
         features = self.encoder(images)
-        return features
+        return FeatureMaps(features)
     def get_backbone_channels(self) -> list[list[int]]:
         """Returns the output channels of the encoder at different scales.

rslearn/models/satlaspretrain.py CHANGED Viewed

@@ -1,19 +1,20 @@
 """SatlasPretrain models."""
-from typing import Any
 import satlaspretrain_models
 import torch
+import torch.nn.functional as F
+from rslearn.train.model_context import ModelContext
+from .component import FeatureExtractor, FeatureMaps
-class SatlasPretrain(torch.nn.Module):
+class SatlasPretrain(FeatureExtractor):
     """SatlasPretrain backbones."""
     def __init__(
-        self,
-        model_identifier: str,
-        fpn: bool = False,
-    ):
+        self, model_identifier: str, fpn: bool = False, resize_to_pretrain: bool = False
+    ) -> None:
         """Instantiate a new SatlasPretrain instance.
         Args:
@@ -21,11 +22,13 @@ class SatlasPretrain(torch.nn.Module):
                 https://github.com/allenai/satlaspretrain_models
             fpn: whether to include the feature pyramid network, otherwise only the
                 Swin-v2-Transformer is used.
+            resize_to_pretrain: whether to resize inputs to the pretraining input
+                size (512 x 512)
         """
         super().__init__()
         weights_manager = satlaspretrain_models.Weights()
         self.model = weights_manager.get_pretrained_model(
-            model_identifier=model_identifier, fpn=fpn
+            model_identifier=model_identifier, fpn=fpn, device="cpu"
         )
         if "SwinB" in model_identifier:
@@ -49,21 +52,38 @@ class SatlasPretrain(torch.nn.Module):
                 [16, 1024],
                 [32, 2048],
             ]
+        self.resize_to_pretrain = resize_to_pretrain
+    def maybe_resize(self, data: torch.Tensor) -> list[torch.Tensor]:
+        """Resize to pretraining sizes if resize_to_pretrain == True."""
+        if self.resize_to_pretrain:
+            return F.interpolate(
+                data,
+                size=(512, 512),
+                mode="bilinear",
+                align_corners=False,
+            )
+        else:
+            return data
-    def forward(
-        self, inputs: list[dict[str, Any]], targets: list[dict[str, Any]] = None
-    ):
+    def forward(self, context: ModelContext) -> FeatureMaps:
         """Compute feature maps from the SatlasPretrain backbone.
-        Inputs:
-            inputs: input dicts that must include "image" key containing the image to
-                process.
-            targets: target dicts that are ignored
+        Args:
+            context: the model context. Input dicts must contain an "image" key
+                containing the image input to the model.
+        Returns:
+            multi-resolution feature maps computed by the model.
         """
-        images = torch.stack([inp["image"] for inp in inputs], dim=0)
-        return self.model(images)
+        # take the first (assumed to be only) timestep
+        images = torch.stack(
+            [inp["image"].single_ts_to_chw_tensor() for inp in context.inputs], dim=0
+        )
+        feature_maps = self.model(self.maybe_resize(images))
+        return FeatureMaps(feature_maps)
-    def get_backbone_channels(self):
+    def get_backbone_channels(self) -> list:
         """Returns the output channels of this model when used as a backbone.
         The output channels is a list of (downsample_factor, depth) that corresponds

rslearn/models/simple_time_series.py CHANGED Viewed

@@ -3,12 +3,17 @@
 from typing import Any
 import torch
+from einops import rearrange
+from rslearn.train.model_context import ModelContext, RasterImage
-class SimpleTimeSeries(torch.nn.Module):
-    """SimpleTimeSeries wraps another encoder and applies it on an image time series.
+from .component import FeatureExtractor, FeatureMaps
-    It independently applies the other encoder on each image in the time series to
+class SimpleTimeSeries(FeatureExtractor):
+    """SimpleTimeSeries wraps another FeatureExtractor and applies it on an image time series.
+    It independently applies the other FeatureExtractor on each image in the time series to
     extract feature maps. It then provides a few ways to combine the features into one
     final feature map:
     - Temporal max pooling.
@@ -19,17 +24,21 @@ class SimpleTimeSeries(torch.nn.Module):
     def __init__(
         self,
-        encoder: torch.nn.Module,
-        image_channels: int,
+        encoder: FeatureExtractor,
+        image_channels: int | None = None,
         op: str = "max",
         groups: list[list[int]] | None = None,
         num_layers: int | None = None,
-    ):
+        image_key: str = "image",
+        backbone_channels: list[tuple[int, int]] | None = None,
+        image_keys: dict[str, int] | None = None,
+    ) -> None:
         """Create a new SimpleTimeSeries.
         Args:
-            encoder: the underlying encoder. It must provide get_backbone_channels
-                function that returns the output channels.
+            encoder: the underlying FeatureExtractor. It must provide get_backbone_channels
+                function that returns the output channels, or backbone_channels must be set.
+                It must output a FeatureMaps.
             image_channels: the number of channels per image of the time series. The
                 input should have multiple images concatenated on the channel axis, so
                 this parameter is used to distinguish the different images.
@@ -42,76 +51,101 @@ class SimpleTimeSeries(torch.nn.Module):
                 combined before features and the combined after features. groups is a
                 list of sets, and each set is a list of image indices.
             num_layers: the number of layers for convrnn, conv3d, and conv1d ops.
+            image_key: the key to access the images.
+            backbone_channels: manually specify the backbone channels. Can be set if
+                the encoder does not provide get_backbone_channels function.
+            image_keys: as an alternative to setting image_channels, map from the key
+                in input dict to the number of channels per timestep for that modality.
+                This way SimpleTimeSeries can be used with multimodal inputs. One of
+                image_channels or image_keys must be specified.
         """
+        if (image_channels is None and image_keys is None) or (
+            image_channels is not None and image_keys is not None
+        ):
+            raise ValueError(
+                "exactly one of image_channels and image_keys must be specified"
+            )
         super().__init__()
         self.encoder = encoder
         self.image_channels = image_channels
         self.op = op
         self.groups = groups
+        self.image_key = image_key
+        self.image_keys = image_keys
-        out_channels = self.encoder.get_backbone_channels()
+        if backbone_channels is not None:
+            out_channels = backbone_channels
+        else:
+            out_channels = self.encoder.get_backbone_channels()
         if self.groups:
             self.num_groups = len(self.groups)
         else:
             self.num_groups = 1
-        if self.op == "convrnn":
-            rnn_kernel_size = 3
-            self.rnn_layers = []
-            for _, count in out_channels:
-                cur_layer = [
-                    torch.nn.Sequential(
-                        torch.nn.Conv2d(
-                            2 * count, count, rnn_kernel_size, padding="same"
-                        ),
-                        torch.nn.ReLU(inplace=True),
-                    )
-                ]
-                for _ in range(num_layers - 1):
-                    cur_layer.append(
+        if self.op in ["convrnn", "conv3d", "conv1d"]:
+            if num_layers is None:
+                raise ValueError(f"num_layers must be specified for {self.op} op")
+            if self.op == "convrnn":
+                rnn_kernel_size = 3
+                rnn_layers = []
+                for _, count in out_channels:
+                    cur_layer = [
                         torch.nn.Sequential(
                             torch.nn.Conv2d(
-                                count, count, rnn_kernel_size, padding="same"
+                                2 * count, count, rnn_kernel_size, padding="same"
                             ),
                             torch.nn.ReLU(inplace=True),
                         )
-                    )
-                cur_layer = torch.nn.Sequential(*cur_layer)
-                self.rnn_layers.append(cur_layer)
-            self.rnn_layers = torch.nn.ModuleList(self.rnn_layers)
-        elif self.op == "conv3d":
-            self.conv3d_layers = []
-            for _, count in out_channels:
-                cur_layer = [
-                    torch.nn.Sequential(
-                        torch.nn.Conv3d(count, count, 3, padding=1, stride=(2, 1, 1)),
-                        torch.nn.ReLU(inplace=True),
-                    )
-                    for _ in range(num_layers)
-                ]
-                cur_layer = torch.nn.Sequential(*cur_layer)
-                self.conv3d_layers.append(cur_layer)
-            self.conv3d_layers = torch.nn.ModuleList(self.conv3d_layers)
-        elif self.op == "conv1d":
-            self.conv1d_layers = []
-            for _, count in out_channels:
-                cur_layer = [
-                    torch.nn.Sequential(
-                        torch.nn.Conv1d(count, count, 3, padding=1, stride=2),
-                        torch.nn.ReLU(inplace=True),
-                    )
-                    for _ in range(num_layers)
-                ]
-                cur_layer = torch.nn.Sequential(*cur_layer)
-                self.conv1d_layers.append(cur_layer)
-            self.conv1d_layers = torch.nn.ModuleList(self.conv1d_layers)
+                    ]
+                    for _ in range(num_layers - 1):
+                        cur_layer.append(
+                            torch.nn.Sequential(
+                                torch.nn.Conv2d(
+                                    count, count, rnn_kernel_size, padding="same"
+                                ),
+                                torch.nn.ReLU(inplace=True),
+                            )
+                        )
+                    cur_layer = torch.nn.Sequential(*cur_layer)
+                    rnn_layers.append(cur_layer)
+                self.rnn_layers = torch.nn.ModuleList(rnn_layers)
+            elif self.op == "conv3d":
+                conv3d_layers = []
+                for _, count in out_channels:
+                    cur_layer = [
+                        torch.nn.Sequential(
+                            torch.nn.Conv3d(
+                                count, count, 3, padding=1, stride=(2, 1, 1)
+                            ),
+                            torch.nn.ReLU(inplace=True),
+                        )
+                        for _ in range(num_layers)
+                    ]
+                    cur_layer = torch.nn.Sequential(*cur_layer)
+                    conv3d_layers.append(cur_layer)
+                self.conv3d_layers = torch.nn.ModuleList(conv3d_layers)
+            elif self.op == "conv1d":
+                conv1d_layers = []
+                for _, count in out_channels:
+                    cur_layer = [
+                        torch.nn.Sequential(
+                            torch.nn.Conv1d(count, count, 3, padding=1, stride=2),
+                            torch.nn.ReLU(inplace=True),
+                        )
+                        for _ in range(num_layers)
+                    ]
+                    cur_layer = torch.nn.Sequential(*cur_layer)
+                    conv1d_layers.append(cur_layer)
+                self.conv1d_layers = torch.nn.ModuleList(conv1d_layers)
         else:
             assert self.op in ["max", "mean"]
-    def get_backbone_channels(self):
+    def get_backbone_channels(self) -> list:
         """Returns the output channels of this model when used as a backbone.
         The output channels is a list of (downsample_factor, depth) that corresponds
@@ -128,27 +162,105 @@ class SimpleTimeSeries(torch.nn.Module):
             out_channels.append((downsample_factor, depth * self.num_groups))
         return out_channels
+    def _get_batched_images(
+        self, input_dicts: list[dict[str, Any]], image_key: str, image_channels: int
+    ) -> list[RasterImage]:
+        """Collect and reshape images across input dicts.
+        The BTCHW image time series are reshaped to (B*T)CHW so they can be passed to
+        the forward pass of a per-image (unitemporal) model.
+        """
+        images = torch.stack(
+            [input_dict[image_key].image for input_dict in input_dicts], dim=0
+        )  # B, C, T, H, W
+        timestamps = [input_dict[image_key].timestamps for input_dict in input_dicts]
+        # if image channels is not equal to the actual number of channels, then
+        # then every N images should be batched together. For example, if the
+        # number of input channels c == 2, and image_channels == 4, then we
+        # want to pass 2 timesteps to the model.
+        # TODO is probably to make this behaviour clearer but lets leave it like
+        # this for now to not break things.
+        num_timesteps = images.shape[1] // image_channels
+        batched_timesteps = images.shape[2] // num_timesteps
+        images = rearrange(
+            images,
+            "b c (b_t k_t) h w -> (b b_t) c k_t h w",
+            b_t=batched_timesteps,
+            k_t=num_timesteps,
+        )
+        if timestamps[0] is None:
+            new_timestamps = [None] * images.shape[0]
+        else:
+            # we also need to split the timestamps
+            new_timestamps = []
+            for t in timestamps:
+                for i in range(batched_timesteps):
+                    new_timestamps.append(
+                        t[i * num_timesteps : (i + 1) * num_timesteps]
+                    )
+        return [
+            RasterImage(image=image, timestamps=timestamps)
+            for image, timestamps in zip(images, new_timestamps)
+        ]  # C, T, H, W
     def forward(
-        self, inputs: list[dict[str, Any]], targets: list[dict[str, Any]] = None
-    ):
+        self,
+        context: ModelContext,
+    ) -> FeatureMaps:
         """Compute outputs from the backbone.
-        Inputs:
-            inputs: input dicts that must include "image" key containing the image time
+        Args:
+            context: the model context. Input dicts must include "image" key containing the image time
                 series to process (with images concatenated on the channel dimension).
-            targets: target dicts that are ignored unless
+        Returns:
+            the FeatureMaps aggregated temporally.
         """
         # First get features of each image.
         # To do so, we need to split up each grouped image into its component images (which have had their channels stacked).
-        images = torch.stack([inp["image"] for inp in inputs], dim=0)
-        n_batch = images.shape[0]
-        n_images = images.shape[1] // self.image_channels
-        n_height = images.shape[2]
-        n_width = images.shape[3]
-        batched_images = images.reshape(
-            n_batch * n_images, self.image_channels, n_height, n_width
+        batched_inputs: list[dict[str, Any]] | None = None
+        n_batch = len(context.inputs)
+        n_images: int | None = None
+        if self.image_keys is not None:
+            for image_key, image_channels in self.image_keys.items():
+                batched_images = self._get_batched_images(
+                    context.inputs, image_key, image_channels
+                )
+                if batched_inputs is None:
+                    batched_inputs = [{} for _ in batched_images]
+                    n_images = len(batched_images) // n_batch
+                elif n_images != len(batched_images) // n_batch:
+                    raise ValueError(
+                        "expected all modalities to have the same number of timesteps"
+                    )
+                for i, image in enumerate(batched_images):
+                    batched_inputs[i][image_key] = image
+        else:
+            assert self.image_channels is not None
+            batched_images = self._get_batched_images(
+                context.inputs, self.image_key, self.image_channels
+            )
+            batched_inputs = [{self.image_key: image} for image in batched_images]
+            n_images = len(batched_images) // n_batch
+        assert n_images is not None
+        # Now we can apply the underlying FeatureExtractor.
+        # Its output must be a FeatureMaps.
+        assert batched_inputs is not None
+        encoder_output = self.encoder(
+            ModelContext(
+                inputs=batched_inputs,
+                metadatas=context.metadatas,
+            )
         )
-        batched_inputs = [{"image": image} for image in batched_images]
+        if not isinstance(encoder_output, FeatureMaps):
+            raise ValueError(
+                "output of underlying FeatureExtractor in SimpleTimeSeries must be a FeatureMaps"
+            )
         all_features = [
             feat_map.reshape(
                 n_batch,
@@ -157,9 +269,8 @@ class SimpleTimeSeries(torch.nn.Module):
                 feat_map.shape[2],
                 feat_map.shape[3],
             )
-            for feat_map in self.encoder(batched_inputs)
+            for feat_map in encoder_output.feature_maps
         ]
         # Groups defaults to flattening all the feature maps.
         groups = self.groups
         if not groups:
@@ -171,13 +282,13 @@ class SimpleTimeSeries(torch.nn.Module):
         for feature_idx in range(len(all_features)):
             aggregated_features = []
             for group in groups:
-                group_features = []
+                group_features_list = []
                 for image_idx in group:
-                    group_features.append(
+                    group_features_list.append(
                         all_features[feature_idx][:, image_idx, :, :, :]
                     )
                 # Resulting group features are (depth, batch, C, height, width).
-                group_features = torch.stack(group_features, dim=0)
+                group_features = torch.stack(group_features_list, dim=0)
                 if self.op == "max":
                     group_features = torch.amax(group_features, dim=0)
@@ -213,7 +324,7 @@ class SimpleTimeSeries(torch.nn.Module):
                         .permute(0, 3, 1, 2)
                     )
                 else:
-                    raise Exception(f"unknown aggregation op {self.op}")
+                    raise ValueError(f"unknown aggregation op {self.op}")
                 aggregated_features.append(group_features)
@@ -222,4 +333,4 @@ class SimpleTimeSeries(torch.nn.Module):
             output_features.append(aggregated_features)
-        return output_features
+        return FeatureMaps(output_features)

rslearn/models/singletask.py CHANGED Viewed

@@ -4,6 +4,10 @@ from typing import Any
 import torch
+from rslearn.train.model_context import ModelContext, ModelOutput
+from .component import FeatureExtractor, IntermediateComponent, Predictor
 class SingleTaskModel(torch.nn.Module):
     """Standard model wrapper.
@@ -14,34 +18,41 @@ class SingleTaskModel(torch.nn.Module):
     outputs and targets from the last module (which also receives the targets).
     """
-    def __init__(self, encoder: list[torch.nn.Module], decoder: list[torch.nn.Module]):
+    def __init__(
+        self,
+        encoder: list[FeatureExtractor | IntermediateComponent],
+        decoder: list[IntermediateComponent | Predictor],
+    ):
         """Initialize a new SingleTaskModel.
         Args:
-            encoder: modules to compute intermediate feature representations.
-            decoder: modules to compute outputs and loss.
+            encoder: modules to compute intermediate feature representations. The first
+                module must be a FeatureExtractor, and following modules must be
+                IntermediateComponents.
+            decoder: modules to compute outputs and loss. The last module must be a
+                Predictor, while the previous modules must be IntermediateComponents.
         """
         super().__init__()
-        self.encoder = torch.nn.Sequential(*encoder)
+        self.encoder = torch.nn.ModuleList(encoder)
         self.decoder = torch.nn.ModuleList(decoder)
     def forward(
         self,
-        inputs: list[dict[str, Any]],
+        context: ModelContext,
         targets: list[dict[str, Any]] | None = None,
-    ) -> tuple[list[Any], dict[str, torch.Tensor]]:
+    ) -> ModelOutput:
         """Apply the sequence of modules on the inputs.
         Args:
-            inputs: list of input dicts
+            context: the model context.
             targets: optional list of target dicts
         Returns:
-            tuple (outputs, loss_dict) from the last module.
+            the model output.
         """
-        features = self.encoder(inputs)
-        cur = features
+        cur = self.encoder[0](context)
+        for module in self.encoder[1:]:
+            cur = module(cur, context)
         for module in self.decoder[:-1]:
-            cur = module(cur, inputs)
-        return self.decoder[-1](cur, inputs, targets)
+            cur = module(cur, context)
+        return self.decoder[-1](cur, context, targets)

rslearn 0.0.1__py3-none-any.whl → 0.0.21__py3-none-any.whl

rslearn 0.0.1py3-none-any.whl → 0.0.21py3-none-any.whl