PyPI - rslearn - Versions diffs - 0.0.17__py3-none-any.whl → 0.0.19__py3-none-any.whl - Mend

rslearn 0.0.17py3-none-any.whl → 0.0.19py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (73) hide show

rslearn/arg_parser.py +2 -9
rslearn/config/__init__.py +2 -0
rslearn/config/dataset.py +64 -20
rslearn/dataset/add_windows.py +1 -1
rslearn/dataset/dataset.py +34 -84
rslearn/dataset/materialize.py +5 -5
rslearn/dataset/storage/__init__.py +1 -0
rslearn/dataset/storage/file.py +202 -0
rslearn/dataset/storage/storage.py +140 -0
rslearn/dataset/window.py +26 -80
rslearn/lightning_cli.py +22 -11
rslearn/main.py +12 -37
rslearn/models/anysat.py +11 -9
rslearn/models/attention_pooling.py +177 -0
rslearn/models/clay/clay.py +8 -9
rslearn/models/clip.py +18 -15
rslearn/models/component.py +111 -0
rslearn/models/concatenate_features.py +21 -11
rslearn/models/conv.py +15 -8
rslearn/models/croma.py +13 -8
rslearn/models/detr/detr.py +25 -14
rslearn/models/dinov3.py +11 -6
rslearn/models/faster_rcnn.py +19 -9
rslearn/models/feature_center_crop.py +12 -9
rslearn/models/fpn.py +19 -8
rslearn/models/galileo/galileo.py +23 -18
rslearn/models/module_wrapper.py +26 -57
rslearn/models/molmo.py +16 -14
rslearn/models/multitask.py +102 -73
rslearn/models/olmoearth_pretrain/model.py +135 -38
rslearn/models/panopticon.py +8 -7
rslearn/models/pick_features.py +18 -24
rslearn/models/pooling_decoder.py +22 -14
rslearn/models/presto/presto.py +16 -10
rslearn/models/presto/single_file_presto.py +4 -10
rslearn/models/prithvi.py +12 -8
rslearn/models/resize_features.py +21 -7
rslearn/models/sam2_enc.py +11 -9
rslearn/models/satlaspretrain.py +15 -9
rslearn/models/simple_time_series.py +37 -17
rslearn/models/singletask.py +24 -17
rslearn/models/ssl4eo_s12.py +15 -10
rslearn/models/swin.py +22 -13
rslearn/models/terramind.py +24 -7
rslearn/models/trunk.py +6 -3
rslearn/models/unet.py +18 -9
rslearn/models/upsample.py +22 -9
rslearn/train/all_patches_dataset.py +89 -37
rslearn/train/dataset.py +105 -97
rslearn/train/lightning_module.py +51 -32
rslearn/train/model_context.py +54 -0
rslearn/train/prediction_writer.py +111 -41
rslearn/train/scheduler.py +15 -0
rslearn/train/tasks/classification.py +34 -15
rslearn/train/tasks/detection.py +24 -31
rslearn/train/tasks/embedding.py +33 -29
rslearn/train/tasks/multi_task.py +7 -7
rslearn/train/tasks/per_pixel_regression.py +41 -19
rslearn/train/tasks/regression.py +38 -21
rslearn/train/tasks/segmentation.py +33 -15
rslearn/train/tasks/task.py +3 -2
rslearn/train/transforms/resize.py +74 -0
rslearn/utils/geometry.py +73 -0
rslearn/utils/jsonargparse.py +66 -0
{rslearn-0.0.17.dist-info → rslearn-0.0.19.dist-info}/METADATA +1 -1
{rslearn-0.0.17.dist-info → rslearn-0.0.19.dist-info}/RECORD +71 -66
rslearn/dataset/index.py +0 -173
rslearn/models/registry.py +0 -22
{rslearn-0.0.17.dist-info → rslearn-0.0.19.dist-info}/WHEEL +0 -0
{rslearn-0.0.17.dist-info → rslearn-0.0.19.dist-info}/entry_points.txt +0 -0
{rslearn-0.0.17.dist-info → rslearn-0.0.19.dist-info}/licenses/LICENSE +0 -0
{rslearn-0.0.17.dist-info → rslearn-0.0.19.dist-info}/licenses/NOTICE +0 -0
{rslearn-0.0.17.dist-info → rslearn-0.0.19.dist-info}/top_level.txt +0 -0

rslearn/models/pick_features.py CHANGED Viewed

@@ -2,45 +2,39 @@
 from typing import Any
-import torch
+from rslearn.train.model_context import ModelContext
+from .component import (
+    FeatureMaps,
+    IntermediateComponent,
+)
-class PickFeatures(torch.nn.Module):
+class PickFeatures(IntermediateComponent):
     """Picks a subset of feature maps in a multi-scale feature map list."""
-    def __init__(self, indexes: list[int], collapse: bool = False):
+    def __init__(self, indexes: list[int]):
         """Create a new PickFeatures.
         Args:
             indexes: the indexes of the input feature map list to select.
-            collapse: return one feature map instead of list. If enabled, indexes must
-                consist of one index. This is mainly useful for using PickFeatures as
-                the final module in the decoder, since the final prediction is expected
-                to be one feature map for most tasks like segmentation.
         """
         super().__init__()
         self.indexes = indexes
-        self.collapse = collapse
-        if self.collapse and len(self.indexes) != 1:
-            raise ValueError("if collapse is enabled, must get exactly one index")
     def forward(
         self,
-        features: list[torch.Tensor],
-        inputs: list[dict[str, Any]] | None = None,
-        targets: list[dict[str, Any]] | None = None,
-    ) -> list[torch.Tensor]:
+        intermediates: Any,
+        context: ModelContext,
+    ) -> FeatureMaps:
         """Pick a subset of the features.
         Args:
-            features: input features
-            inputs: raw inputs, not used
-            targets: targets, not used
+            intermediates: the output from the previous component, which must be a FeatureMaps.
+            context: the model context.
         """
-        new_features = [features[idx] for idx in self.indexes]
-        if self.collapse:
-            assert len(new_features) == 1
-            return new_features[0]
-        else:
-            return new_features
+        if not isinstance(intermediates, FeatureMaps):
+            raise ValueError("input to PickFeatures must be FeatureMaps")
+        new_features = [intermediates.feature_maps[idx] for idx in self.indexes]
+        return FeatureMaps(new_features)

rslearn/models/pooling_decoder.py CHANGED Viewed

@@ -4,8 +4,16 @@ from typing import Any
 import torch
+from rslearn.train.model_context import ModelContext
-class PoolingDecoder(torch.nn.Module):
+from .component import (
+    FeatureMaps,
+    FeatureVector,
+    IntermediateComponent,
+)
+class PoolingDecoder(IntermediateComponent):
     """Decoder that computes flat vector from a 2D feature map.
     It inputs multi-scale features, but only uses the last feature map. Then applies a
@@ -57,25 +65,26 @@ class PoolingDecoder(torch.nn.Module):
         self.output_layer = torch.nn.Linear(prev_channels, out_channels)
-    def forward(
-        self, features: list[torch.Tensor], inputs: list[dict[str, Any]]
-    ) -> torch.Tensor:
+    def forward(self, intermediates: Any, context: ModelContext) -> Any:
         """Compute flat output vector from multi-scale feature map.
         Args:
-            features: list of feature maps at different resolutions.
-            inputs: original inputs (ignored).
+            intermediates: the output from the previous component, which must be a FeatureMaps.
+            context: the model context.
         Returns:
             flat feature vector
         """
+        if not isinstance(intermediates, FeatureMaps):
+            raise ValueError("input to PoolingDecoder must be a FeatureMaps")
         # Only use last feature map.
-        features = features[-1]
+        features = intermediates.feature_maps[-1]
         features = self.conv_layers(features)
         features = torch.amax(features, dim=(2, 3))
         features = self.fc_layers(features)
-        return self.output_layer(features)
+        return FeatureVector(self.output_layer(features))
 class SegmentationPoolingDecoder(PoolingDecoder):
@@ -108,14 +117,13 @@ class SegmentationPoolingDecoder(PoolingDecoder):
         super().__init__(in_channels=in_channels, out_channels=out_channels, **kwargs)
         self.image_key = image_key
-    def forward(
-        self, features: list[torch.Tensor], inputs: list[dict[str, Any]]
-    ) -> torch.Tensor:
+    def forward(self, intermediates: Any, context: ModelContext) -> Any:
         """Extend PoolingDecoder forward to upsample the output to a segmentation mask.
         This only works when all of the pixels have the same segmentation target.
         """
-        output_probs = super().forward(features, inputs)
+        output_probs = super().forward(intermediates, context)
         # BC -> BCHW
-        h, w = inputs[0][self.image_key].shape[1:3]
-        return output_probs[:, :, None, None].repeat([1, 1, h, w])
+        h, w = context.inputs[0][self.image_key].shape[1:3]
+        feat_map = output_probs.feature_vector[:, :, None, None].repeat([1, 1, h, w])
+        return FeatureMaps([feat_map])

rslearn/models/presto/presto.py CHANGED Viewed

@@ -2,14 +2,13 @@
 import logging
 import tempfile
-from typing import Any
 import torch
 from einops import rearrange, repeat
 from huggingface_hub import hf_hub_download
-from torch import nn
 from upath import UPath
+from rslearn.models.component import FeatureExtractor, FeatureMaps
 from rslearn.models.presto.single_file_presto import (
     ERA5_BANDS,
     NUM_DYNAMIC_WORLD_CLASSES,
@@ -21,6 +20,7 @@ from rslearn.models.presto.single_file_presto import (
     SRTM_BANDS,
 )
 from rslearn.models.presto.single_file_presto import Presto as SFPresto
+from rslearn.train.model_context import ModelContext
 logger = logging.getLogger(__name__)
@@ -36,7 +36,7 @@ HF_HUB_ID = "nasaharvest/presto"
 MODEL_FILENAME = "default_model.pt"
-class Presto(nn.Module):
+class Presto(FeatureExtractor):
     """Presto."""
     input_keys = [
@@ -184,22 +184,26 @@ class Presto(nn.Module):
             x = (x + PRESTO_ADD_BY.to(device=device)) / PRESTO_DIV_BY.to(device=device)
         return x, mask, dynamic_world.long(), months.long()
-    def forward(self, inputs: list[dict[str, Any]]) -> list[torch.Tensor]:
+    def forward(self, context: ModelContext) -> FeatureMaps:
         """Compute feature maps from the Presto backbone.
-        Inputs:
-            inputs
+        Args:
+            context: the model context. Input dicts should have some subset of Presto.input_keys.
+        Returns:
+            a FeatureMaps with one feature map that is at the same resolution as the
+                input (since Presto operates per-pixel).
         """
         stacked_inputs = {}
         latlons: torch.Tensor | None = None
-        for key in inputs[0].keys():
+        for key in context.inputs[0].keys():
             # assume all the keys in an input are consistent
             if key in self.input_keys:
                 if key == "latlon":
-                    latlons = torch.stack([inp[key] for inp in inputs], dim=0)
+                    latlons = torch.stack([inp[key] for inp in context.inputs], dim=0)
                 else:
                     stacked_inputs[key] = torch.stack(
-                        [inp[key] for inp in inputs], dim=0
+                        [inp[key] for inp in context.inputs], dim=0
                     )
         (
@@ -247,7 +251,9 @@ class Presto(nn.Module):
             )
             output_features[batch_idx : batch_idx + self.pixel_batch_size] = output_b
-        return [rearrange(output_features, "(b h w) d -> b d h w", h=h, w=w, b=b)]
+        return FeatureMaps(
+            [rearrange(output_features, "(b h w) d -> b d h w", h=h, w=w, b=b)]
+        )
     def get_backbone_channels(self) -> list:
         """Returns the output channels of this model when used as a backbone.

rslearn/models/presto/single_file_presto.py CHANGED Viewed

@@ -281,10 +281,7 @@ def get_sinusoid_encoding_table(
     sinusoid_table[:, 0::2] = np.sin(sinusoid_table[:, 0::2])  # dim 2i
     sinusoid_table[:, 1::2] = np.cos(sinusoid_table[:, 1::2])  # dim 2i+1
-    if torch.cuda.is_available():
-        return torch.FloatTensor(sinusoid_table).cuda()
-    else:
-        return torch.FloatTensor(sinusoid_table)
+    return torch.FloatTensor(sinusoid_table)
 def get_month_encoding_table(d_hid: int) -> torch.Tensor:
@@ -296,10 +293,7 @@ def get_month_encoding_table(d_hid: int) -> torch.Tensor:
     cos_table = np.cos(np.stack([angles for _ in range(d_hid // 2)], axis=-1))
     month_table = np.concatenate([sin_table[:-1], cos_table[:-1]], axis=-1)
-    if torch.cuda.is_available():
-        return torch.FloatTensor(month_table).cuda()
-    else:
-        return torch.FloatTensor(month_table)
+    return torch.FloatTensor(month_table)
 def month_to_tensor(
@@ -405,7 +399,7 @@ class Encoder(nn.Module):
         """initialize_weights."""
         pos_embed = get_sinusoid_encoding_table(
             self.pos_embed.shape[1], self.pos_embed.shape[-1]
-        )
+        ).to(device=self.pos_embed.device)
         self.pos_embed.data.copy_(pos_embed)
         # initialize nn.Linear and nn.LayerNorm
@@ -640,7 +634,7 @@ class Decoder(nn.Module):
         """initialize_weights."""
         pos_embed = get_sinusoid_encoding_table(
             self.pos_embed.shape[1], self.pos_embed.shape[-1]
-        )
+        ).to(device=self.pos_embed.device)
         self.pos_embed.data.copy_(pos_embed)
         # initialize nn.Linear and nn.LayerNorm

rslearn/models/prithvi.py CHANGED Viewed

@@ -25,9 +25,12 @@ from timm.layers import to_2tuple
 from timm.models.vision_transformer import Block
 from torch.nn import functional as F
+from rslearn.train.model_context import ModelContext
 from rslearn.train.transforms.normalize import Normalize
 from rslearn.train.transforms.transform import Transform
+from .component import FeatureExtractor, FeatureMaps
 logger = logging.getLogger(__name__)
@@ -77,7 +80,7 @@ def get_config(cache_dir: Path, hf_hub_id: str, hf_hub_revision: str) -> dict[st
         return json.load(f)["pretrained_cfg"]
-class PrithviV2(nn.Module):
+class PrithviV2(FeatureExtractor):
     """An Rslearn wrapper for Prithvi 2.0."""
     INPUT_KEY = "image"
@@ -157,18 +160,18 @@ class PrithviV2(nn.Module):
         )
         return data
-    def forward(self, inputs: list[dict[str, Any]]) -> list[torch.Tensor]:
+    def forward(self, context: ModelContext) -> FeatureMaps:
         """Compute feature maps from the Prithvi V2 backbone.
         Args:
-            inputs: input dicts that must include "image" key containing HLS
-                (Harmonized Landsat-Sentinel) data.
+            context: the model context. Input dicts must include "image" key containing
+                HLS (Harmonized Landsat-Sentinel) data.
         Returns:
-            11 feature maps (one per transformer block in the Prithvi model),
-            of shape [B, H/p_s, W/p_s, D=1024] where p_s=16 is the patch size.
+            a FeatureMaps with one map of shape [B, H/p_s, W/p_s, 11*1024] that contains stacked
+                feature maps across the 11 transformer blocks.
         """
-        x = torch.stack([inp[self.INPUT_KEY] for inp in inputs], dim=0)
+        x = torch.stack([inp[self.INPUT_KEY] for inp in context.inputs], dim=0)
         x = self._resize_data(x)
         num_timesteps = x.shape[1] // len(self.bands)
         x = rearrange(x, "b (t c) h w -> b c t h w", t=num_timesteps)
@@ -177,9 +180,10 @@ class PrithviV2(nn.Module):
         # know the number of timesteps and don't need to recompute it.
         # in addition we average along the time dimension (instead of concatenating)
         # to keep the embeddings reasonably sized.
-        return self.model.encoder.prepare_features_for_image_model(
+        result = self.model.encoder.prepare_features_for_image_model(
             features, num_timesteps
         )
+        return FeatureMaps([torch.cat(result, dim=1)])
     def get_backbone_channels(self) -> list:
         """Returns the output channels of this model when used as a backbone.

rslearn/models/resize_features.py CHANGED Viewed

@@ -1,9 +1,18 @@
 """The ResizeFeatures module."""
+from typing import Any
 import torch
+from rslearn.train.model_context import ModelContext
+from .component import (
+    FeatureMaps,
+    IntermediateComponent,
+)
-class ResizeFeatures(torch.nn.Module):
+class ResizeFeatures(IntermediateComponent):
     """Resize input features to new sizes."""
     def __init__(
@@ -30,16 +39,21 @@ class ResizeFeatures(torch.nn.Module):
             )
         self.layers = torch.nn.ModuleList(layers)
-    def forward(
-        self, features: list[torch.Tensor], inputs: list[torch.Tensor]
-    ) -> list[torch.Tensor]:
+    def forward(self, intermediates: Any, context: ModelContext) -> FeatureMaps:
         """Resize the input feature maps to new sizes.
         Args:
-            features: list of feature maps at different resolutions.
-            inputs: original inputs (ignored).
+            intermediates: the outputs from the previous component, which must be a FeatureMaps.
+            context: the model context.
         Returns:
             resized feature maps
         """
-        return [self.layers[idx](feat_map) for idx, feat_map in enumerate(features)]
+        if not isinstance(intermediates, FeatureMaps):
+            raise ValueError("input to ResizeFeatures must be a FeatureMaps")
+        feat_maps = intermediates.feature_maps
+        resized_feat_maps = [
+            self.layers[idx](feat_map) for idx, feat_map in enumerate(feat_maps)
+        ]
+        return FeatureMaps(resized_feat_maps)

rslearn/models/sam2_enc.py CHANGED Viewed

@@ -1,14 +1,15 @@
 """SegmentAnything2 encoders."""
-from typing import Any
 import torch
-import torch.nn as nn
 from sam2.build_sam import build_sam2
 from upath import UPath
+from rslearn.train.model_context import ModelContext
+from .component import FeatureExtractor, FeatureMaps
-class SAM2Encoder(nn.Module):
+class SAM2Encoder(FeatureExtractor):
     """SAM2's image encoder."""
     def __init__(self, model_identifier: str) -> None:
@@ -84,18 +85,19 @@ class SAM2Encoder(nn.Module):
         del self.model.obj_ptr_proj
         del self.model.image_encoder.neck
-    def forward(self, inputs: list[dict[str, Any]]) -> list[torch.Tensor]:
+    def forward(self, context: ModelContext) -> FeatureMaps:
         """Extract multi-scale features from a batch of images.
         Args:
-            inputs: List of dictionaries, each containing the input image under the key 'image'.
+            context: the model context. Input dicts must have a key 'image' containing
+                the input for the SAM2 image encoder.
         Returns:
-            List[torch.Tensor]: Multi-scale feature tensors from the encoder.
+            feature maps from the encoder.
         """
-        images = torch.stack([inp["image"] for inp in inputs], dim=0)
+        images = torch.stack([inp["image"] for inp in context.inputs], dim=0)
         features = self.encoder(images)
-        return features
+        return FeatureMaps(features)
     def get_backbone_channels(self) -> list[list[int]]:
         """Returns the output channels of the encoder at different scales.

rslearn/models/satlaspretrain.py CHANGED Viewed

@@ -1,13 +1,15 @@
 """SatlasPretrain models."""
-from typing import Any
 import satlaspretrain_models
 import torch
 import torch.nn.functional as F
+from rslearn.train.model_context import ModelContext
+from .component import FeatureExtractor, FeatureMaps
-class SatlasPretrain(torch.nn.Module):
+class SatlasPretrain(FeatureExtractor):
     """SatlasPretrain backbones."""
     def __init__(
@@ -64,15 +66,19 @@ class SatlasPretrain(torch.nn.Module):
         else:
             return data
-    def forward(self, inputs: list[dict[str, Any]]) -> list[torch.Tensor]:
+    def forward(self, context: ModelContext) -> FeatureMaps:
         """Compute feature maps from the SatlasPretrain backbone.
-        Inputs:
-            inputs: input dicts that must include "image" key containing the image to
-                process.
+        Args:
+            context: the model context. Input dicts must contain an "image" key
+                containing the image input to the model.
+        Returns:
+            multi-resolution feature maps computed by the model.
         """
-        images = torch.stack([inp["image"] for inp in inputs], dim=0)
-        return self.model(self.maybe_resize(images))
+        images = torch.stack([inp["image"] for inp in context.inputs], dim=0)
+        feature_maps = self.model(self.maybe_resize(images))
+        return FeatureMaps(feature_maps)
     def get_backbone_channels(self) -> list:
         """Returns the output channels of this model when used as a backbone.

rslearn/models/simple_time_series.py CHANGED Viewed

@@ -4,11 +4,15 @@ from typing import Any
 import torch
+from rslearn.train.model_context import ModelContext
-class SimpleTimeSeries(torch.nn.Module):
-    """SimpleTimeSeries wraps another encoder and applies it on an image time series.
+from .component import FeatureExtractor, FeatureMaps
-    It independently applies the other encoder on each image in the time series to
+class SimpleTimeSeries(FeatureExtractor):
+    """SimpleTimeSeries wraps another FeatureExtractor and applies it on an image time series.
+    It independently applies the other FeatureExtractor on each image in the time series to
     extract feature maps. It then provides a few ways to combine the features into one
     final feature map:
     - Temporal max pooling.
@@ -19,7 +23,7 @@ class SimpleTimeSeries(torch.nn.Module):
     def __init__(
         self,
-        encoder: torch.nn.Module,
+        encoder: FeatureExtractor,
         image_channels: int | None = None,
         op: str = "max",
         groups: list[list[int]] | None = None,
@@ -31,9 +35,9 @@ class SimpleTimeSeries(torch.nn.Module):
         """Create a new SimpleTimeSeries.
         Args:
-            encoder: the underlying encoder. It must provide get_backbone_channels
-                function that returns the output channels, or backbone_channels must be
-                set.
+            encoder: the underlying FeatureExtractor. It must provide get_backbone_channels
+                function that returns the output channels, or backbone_channels must be set.
+                It must output a FeatureMaps.
             image_channels: the number of channels per image of the time series. The
                 input should have multiple images concatenated on the channel axis, so
                 this parameter is used to distinguish the different images.
@@ -179,24 +183,27 @@ class SimpleTimeSeries(torch.nn.Module):
     def forward(
         self,
-        inputs: list[dict[str, Any]],
-    ) -> list[torch.Tensor]:
+        context: ModelContext,
+    ) -> FeatureMaps:
         """Compute outputs from the backbone.
-        Inputs:
-            inputs: input dicts that must include "image" key containing the image time
+        Args:
+            context: the model context. Input dicts must include "image" key containing the image time
                 series to process (with images concatenated on the channel dimension).
+        Returns:
+            the FeatureMaps aggregated temporally.
         """
         # First get features of each image.
         # To do so, we need to split up each grouped image into its component images (which have had their channels stacked).
         batched_inputs: list[dict[str, Any]] | None = None
-        n_batch = len(inputs)
+        n_batch = len(context.inputs)
         n_images: int | None = None
         if self.image_keys is not None:
             for image_key, image_channels in self.image_keys.items():
                 batched_images = self._get_batched_images(
-                    inputs, image_key, image_channels
+                    context.inputs, image_key, image_channels
                 )
                 if batched_inputs is None:
@@ -213,13 +220,26 @@ class SimpleTimeSeries(torch.nn.Module):
         else:
             assert self.image_channels is not None
             batched_images = self._get_batched_images(
-                inputs, self.image_key, self.image_channels
+                context.inputs, self.image_key, self.image_channels
             )
             batched_inputs = [{self.image_key: image} for image in batched_images]
             n_images = batched_images.shape[0] // n_batch
         assert n_images is not None
+        # Now we can apply the underlying FeatureExtractor.
+        # Its output must be a FeatureMaps.
+        assert batched_inputs is not None
+        encoder_output = self.encoder(
+            ModelContext(
+                inputs=batched_inputs,
+                metadatas=context.metadatas,
+            )
+        )
+        if not isinstance(encoder_output, FeatureMaps):
+            raise ValueError(
+                "output of underlying FeatureExtractor in SimpleTimeSeries must be a FeatureMaps"
+            )
         all_features = [
             feat_map.reshape(
                 n_batch,
@@ -228,7 +248,7 @@ class SimpleTimeSeries(torch.nn.Module):
                 feat_map.shape[2],
                 feat_map.shape[3],
             )
-            for feat_map in self.encoder(batched_inputs)
+            for feat_map in encoder_output.feature_maps
         ]
         # Groups defaults to flattening all the feature maps.
@@ -284,7 +304,7 @@ class SimpleTimeSeries(torch.nn.Module):
                         .permute(0, 3, 1, 2)
                     )
                 else:
-                    raise Exception(f"unknown aggregation op {self.op}")
+                    raise ValueError(f"unknown aggregation op {self.op}")
                 aggregated_features.append(group_features)
@@ -293,4 +313,4 @@ class SimpleTimeSeries(torch.nn.Module):
             output_features.append(aggregated_features)
-        return output_features
+        return FeatureMaps(output_features)

rslearn/models/singletask.py CHANGED Viewed

@@ -4,6 +4,10 @@ from typing import Any
 import torch
+from rslearn.train.model_context import ModelContext, ModelOutput
+from .component import FeatureExtractor, IntermediateComponent, Predictor
 class SingleTaskModel(torch.nn.Module):
     """Standard model wrapper.
@@ -14,38 +18,41 @@ class SingleTaskModel(torch.nn.Module):
     outputs and targets from the last module (which also receives the targets).
     """
-    def __init__(self, encoder: list[torch.nn.Module], decoder: list[torch.nn.Module]):
+    def __init__(
+        self,
+        encoder: list[FeatureExtractor | IntermediateComponent],
+        decoder: list[IntermediateComponent | Predictor],
+    ):
         """Initialize a new SingleTaskModel.
         Args:
-            encoder: modules to compute intermediate feature representations.
-            decoder: modules to compute outputs and loss.
+            encoder: modules to compute intermediate feature representations. The first
+                module must be a FeatureExtractor, and following modules must be
+                IntermediateComponents.
+            decoder: modules to compute outputs and loss. The last module must be a
+                Predictor, while the previous modules must be IntermediateComponents.
         """
         super().__init__()
-        self.encoder = torch.nn.Sequential(*encoder)
+        self.encoder = torch.nn.ModuleList(encoder)
         self.decoder = torch.nn.ModuleList(decoder)
     def forward(
         self,
-        inputs: list[dict[str, Any]],
+        context: ModelContext,
         targets: list[dict[str, Any]] | None = None,
-    ) -> dict[str, Any]:
+    ) -> ModelOutput:
         """Apply the sequence of modules on the inputs.
         Args:
-            inputs: list of input dicts
+            context: the model context.
             targets: optional list of target dicts
-            info: optional dictionary of info to pass to the last module
         Returns:
-            dict with keys "outputs" and "loss_dict".
+            the model output.
         """
-        features = self.encoder(inputs)
-        cur = features
+        cur = self.encoder[0](context)
+        for module in self.encoder[1:]:
+            cur = module(cur, context)
         for module in self.decoder[:-1]:
-            cur = module(cur, inputs)
-        outputs, loss_dict = self.decoder[-1](cur, inputs, targets)
-        return {
-            "outputs": outputs,
-            "loss_dict": loss_dict,
-        }
+            cur = module(cur, context)
+        return self.decoder[-1](cur, context, targets)

rslearn 0.0.17__py3-none-any.whl → 0.0.19__py3-none-any.whl

rslearn 0.0.17py3-none-any.whl → 0.0.19py3-none-any.whl