PyPI - rslearn - Versions diffs - 0.0.19__py3-none-any.whl → 0.0.20__py3-none-any.whl - Mend

rslearn 0.0.19py3-none-any.whl → 0.0.20py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (45) hide show

rslearn/models/anysat.py +35 -33
rslearn/models/clip.py +5 -2
rslearn/models/croma.py +11 -3
rslearn/models/dinov3.py +2 -1
rslearn/models/faster_rcnn.py +2 -1
rslearn/models/galileo/galileo.py +58 -31
rslearn/models/module_wrapper.py +6 -1
rslearn/models/molmo.py +4 -2
rslearn/models/olmoearth_pretrain/model.py +93 -29
rslearn/models/olmoearth_pretrain/norm.py +5 -3
rslearn/models/panopticon.py +3 -1
rslearn/models/presto/presto.py +45 -15
rslearn/models/prithvi.py +9 -7
rslearn/models/sam2_enc.py +3 -1
rslearn/models/satlaspretrain.py +4 -1
rslearn/models/simple_time_series.py +36 -16
rslearn/models/ssl4eo_s12.py +19 -14
rslearn/models/swin.py +3 -1
rslearn/models/terramind.py +5 -4
rslearn/train/all_patches_dataset.py +34 -14
rslearn/train/dataset.py +66 -10
rslearn/train/model_context.py +35 -1
rslearn/train/tasks/classification.py +8 -2
rslearn/train/tasks/detection.py +3 -2
rslearn/train/tasks/multi_task.py +2 -3
rslearn/train/tasks/per_pixel_regression.py +14 -5
rslearn/train/tasks/regression.py +8 -2
rslearn/train/tasks/segmentation.py +13 -4
rslearn/train/tasks/task.py +2 -2
rslearn/train/transforms/concatenate.py +45 -5
rslearn/train/transforms/crop.py +22 -8
rslearn/train/transforms/flip.py +13 -5
rslearn/train/transforms/mask.py +11 -2
rslearn/train/transforms/normalize.py +46 -15
rslearn/train/transforms/pad.py +15 -3
rslearn/train/transforms/resize.py +18 -9
rslearn/train/transforms/select_bands.py +11 -2
rslearn/train/transforms/sentinel1.py +18 -3
{rslearn-0.0.19.dist-info → rslearn-0.0.20.dist-info}/METADATA +1 -1
{rslearn-0.0.19.dist-info → rslearn-0.0.20.dist-info}/RECORD +45 -45
{rslearn-0.0.19.dist-info → rslearn-0.0.20.dist-info}/WHEEL +0 -0
{rslearn-0.0.19.dist-info → rslearn-0.0.20.dist-info}/entry_points.txt +0 -0
{rslearn-0.0.19.dist-info → rslearn-0.0.20.dist-info}/licenses/LICENSE +0 -0
{rslearn-0.0.19.dist-info → rslearn-0.0.20.dist-info}/licenses/NOTICE +0 -0
{rslearn-0.0.19.dist-info → rslearn-0.0.20.dist-info}/top_level.txt +0 -0

rslearn/models/presto/presto.py CHANGED Viewed

@@ -2,6 +2,7 @@
 import logging
 import tempfile
+from datetime import datetime
 import torch
 from einops import rearrange, repeat
@@ -118,21 +119,21 @@ class Presto(FeatureExtractor):
             of each timestep for that pixel
         """
         bs = [x.shape[0] for x in [s1, s2, era5, srtm] if x is not None]
-        hs = [x.shape[2] for x in [s1, s2, era5, srtm] if x is not None]
-        ws = [x.shape[3] for x in [s1, s2, era5, srtm] if x is not None]
+        ts = [x.shape[2] for x in [s1, s2, era5, srtm] if x is not None]
+        hs = [x.shape[3] for x in [s1, s2, era5, srtm] if x is not None]
+        ws = [x.shape[4] for x in [s1, s2, era5, srtm] if x is not None]
         devices = [x.device for x in [s1, s2, era5, srtm] if x is not None]
         assert len(set(bs)) == 1
         assert len(set(hs)) == 1
         assert len(set(ws)) == 1
         assert len(set(devices)) == 1
-        b, h, w, device = bs[0], hs[0], ws[0], devices[0]
+        assert len(set(ts)) == 1
+        b, h, w, t, device = bs[0], hs[0], ws[0], ts[0], devices[0]
         # these values will be initialized as
         # we iterate through the data
         x: torch.Tensor | None = None
         mask: torch.Tensor | None = None
-        t: int | None = None
         for band_group in [
             (s1, s1_bands),
@@ -146,14 +147,7 @@ class Presto(FeatureExtractor):
             else:
                 continue
-            m_t = data.shape[1] // len(input_bands)
-            if t is None:
-                t = m_t
-            else:
-                if t != m_t:
-                    raise ValueError("inconsistent values for t")
-            data = rearrange(data, "b (t c) h w -> b t h w c", t=m_t)
+            data = rearrange(data, "b c t h w -> b t h w c")
             if x is None:
                 x = torch.zeros(b, t, h, w, len(INPUT_PRESTO_BANDS), device=device)
             if mask is None:
@@ -184,6 +178,23 @@ class Presto(FeatureExtractor):
             x = (x + PRESTO_ADD_BY.to(device=device)) / PRESTO_DIV_BY.to(device=device)
         return x, mask, dynamic_world.long(), months.long()
+    @staticmethod
+    def time_ranges_to_timestamps(
+        time_ranges: list[tuple[datetime, datetime]],
+        device: torch.device,
+    ) -> torch.Tensor:
+        """Turn the time ranges stored in a RasterImage to timestamps accepted by Presto.
+        Presto only uses the month associated with each timestamp, so we take the midpoint
+        the time range. For some inputs (e.g. Sentinel 2) we take an image from a specific
+        time so that start_time == end_time == mid_time.
+        """
+        mid_ranges = [t[0] + ((t[1] - t[0]) / 2) for t in time_ranges]
+        # months are indexed 0-11
+        return torch.tensor(
+            [d.month - 1 for d in mid_ranges], dtype=torch.int32, device=device
+        )
     def forward(self, context: ModelContext) -> FeatureMaps:
         """Compute feature maps from the Presto backbone.
@@ -194,17 +205,36 @@ class Presto(FeatureExtractor):
             a FeatureMaps with one feature map that is at the same resolution as the
                 input (since Presto operates per-pixel).
         """
+        time_modalities = ["s1", "s2", "era5"]
         stacked_inputs = {}
         latlons: torch.Tensor | None = None
+        months: torch.Tensor | None = None
         for key in context.inputs[0].keys():
             # assume all the keys in an input are consistent
             if key in self.input_keys:
                 if key == "latlon":
-                    latlons = torch.stack([inp[key] for inp in context.inputs], dim=0)
+                    latlons = torch.stack(
+                        [inp[key].image for inp in context.inputs], dim=0
+                    )
                 else:
                     stacked_inputs[key] = torch.stack(
-                        [inp[key] for inp in context.inputs], dim=0
+                        [inp[key].image for inp in context.inputs], dim=0
                     )
+                if key in time_modalities:
+                    if months is None:
+                        if context.inputs[0][key].timestamps is not None:
+                            months = torch.stack(
+                                [
+                                    self.time_ranges_to_timestamps(
+                                        inp[key].timestamps,  # type: ignore
+                                        device=stacked_inputs[key].device,
+                                    )
+                                    for inp in context.inputs
+                                ],
+                                dim=0,
+                            )
+        if months is not None:
+            stacked_inputs["months"] = months
         (
             x,

rslearn/models/prithvi.py CHANGED Viewed

@@ -144,13 +144,15 @@ class PrithviV2(FeatureExtractor):
         """Process individual modality data.
         Args:
-            data: Input tensor of shape [B, C, H, W]
+            data: Input tensor of shape [B, C, T, H, W]
         Returns:
-            list of tensors of shape [B, C, H, W]
+            list of tensors of shape [B, C, T, H, W]
         """
         # Get original dimensions
-        original_height = data.shape[2]
+        B, C, T, H, W = data.shape
+        data = rearrange(data, "b c t h w -> b (c t) h w")
+        original_height = H
         new_height = self.patch_size if original_height == 1 else self.image_resolution
         data = F.interpolate(
             data,
@@ -158,6 +160,7 @@ class PrithviV2(FeatureExtractor):
             mode="bilinear",
             align_corners=False,
         )
+        data = rearrange(data, "b (c t) h w -> b c t h w", c=C, t=T)
         return data
     def forward(self, context: ModelContext) -> FeatureMaps:
@@ -171,17 +174,16 @@ class PrithviV2(FeatureExtractor):
             a FeatureMaps with one map of shape [B, H/p_s, W/p_s, 11*1024] that contains stacked
                 feature maps across the 11 transformer blocks.
         """
-        x = torch.stack([inp[self.INPUT_KEY] for inp in context.inputs], dim=0)
+        # x has shape BCTHW
+        x = torch.stack([inp[self.INPUT_KEY].image for inp in context.inputs], dim=0)
         x = self._resize_data(x)
-        num_timesteps = x.shape[1] // len(self.bands)
-        x = rearrange(x, "b (t c) h w -> b c t h w", t=num_timesteps)
         features = self.model.encoder.forward_features(x)
         # prepare_features_for_image_model was slightly modified since we already
         # know the number of timesteps and don't need to recompute it.
         # in addition we average along the time dimension (instead of concatenating)
         # to keep the embeddings reasonably sized.
         result = self.model.encoder.prepare_features_for_image_model(
-            features, num_timesteps
+            features, x.shape[2]
         )
         return FeatureMaps([torch.cat(result, dim=1)])

rslearn/models/sam2_enc.py CHANGED Viewed

@@ -95,7 +95,9 @@ class SAM2Encoder(FeatureExtractor):
         Returns:
             feature maps from the encoder.
         """
-        images = torch.stack([inp["image"] for inp in context.inputs], dim=0)
+        images = torch.stack(
+            [inp["image"].single_ts_to_chw_tensor() for inp in context.inputs], dim=0
+        )
         features = self.encoder(images)
         return FeatureMaps(features)

rslearn/models/satlaspretrain.py CHANGED Viewed

@@ -76,7 +76,10 @@ class SatlasPretrain(FeatureExtractor):
         Returns:
             multi-resolution feature maps computed by the model.
         """
-        images = torch.stack([inp["image"] for inp in context.inputs], dim=0)
+        # take the first (assumed to be only) timestep
+        images = torch.stack(
+            [inp["image"].single_ts_to_chw_tensor() for inp in context.inputs], dim=0
+        )
         feature_maps = self.model(self.maybe_resize(images))
         return FeatureMaps(feature_maps)

rslearn/models/simple_time_series.py CHANGED Viewed

@@ -3,8 +3,9 @@
 from typing import Any
 import torch
+from einops import rearrange
-from rslearn.train.model_context import ModelContext
+from rslearn.train.model_context import ModelContext, RasterImage
 from .component import FeatureExtractor, FeatureMaps
@@ -163,23 +164,44 @@ class SimpleTimeSeries(FeatureExtractor):
     def _get_batched_images(
         self, input_dicts: list[dict[str, Any]], image_key: str, image_channels: int
-    ) -> torch.Tensor:
+    ) -> list[RasterImage]:
         """Collect and reshape images across input dicts.
         The BTCHW image time series are reshaped to (B*T)CHW so they can be passed to
         the forward pass of a per-image (unitemporal) model.
         """
         images = torch.stack(
-            [input_dict[image_key] for input_dict in input_dicts], dim=0
+            [input_dict[image_key].image for input_dict in input_dicts], dim=0
+        )  # B, C, T, H, W
+        timestamps = [input_dict[image_key].timestamps for input_dict in input_dicts]
+        # if image channels is not equal to the actual number of channels, then
+        # then every N images should be batched together. For example, if the
+        # number of input channels c == 2, and image_channels == 4, then we
+        # want to pass 2 timesteps to the model.
+        # TODO is probably to make this behaviour clearer but lets leave it like
+        # this for now to not break things.
+        num_timesteps = images.shape[1] // image_channels
+        batched_timesteps = images.shape[2] // num_timesteps
+        images = rearrange(
+            images,
+            "b c (b_t k_t) h w -> (b b_t) c k_t h w",
+            b_t=batched_timesteps,
+            k_t=num_timesteps,
         )
-        n_batch = images.shape[0]
-        n_images = images.shape[1] // image_channels
-        n_height = images.shape[2]
-        n_width = images.shape[3]
-        batched_images = images.reshape(
-            n_batch * n_images, image_channels, n_height, n_width
-        )
-        return batched_images
+        if timestamps[0] is None:
+            new_timestamps = [None] * images.shape[0]
+        else:
+            # we also need to split the timestamps
+            new_timestamps = []
+            for t in timestamps:
+                for i in range(batched_timesteps):
+                    new_timestamps.append(
+                        t[i * num_timesteps : (i + 1) * num_timesteps]
+                    )
+        return [
+            RasterImage(image=image, timestamps=timestamps)
+            for image, timestamps in zip(images, new_timestamps)
+        ]  # C, T, H, W
     def forward(
         self,
@@ -208,8 +230,8 @@ class SimpleTimeSeries(FeatureExtractor):
                 if batched_inputs is None:
                     batched_inputs = [{} for _ in batched_images]
-                    n_images = batched_images.shape[0] // n_batch
-                elif n_images != batched_images.shape[0] // n_batch:
+                    n_images = len(batched_images) // n_batch
+                elif n_images != len(batched_images) // n_batch:
                     raise ValueError(
                         "expected all modalities to have the same number of timesteps"
                     )
@@ -223,10 +245,9 @@ class SimpleTimeSeries(FeatureExtractor):
                 context.inputs, self.image_key, self.image_channels
             )
             batched_inputs = [{self.image_key: image} for image in batched_images]
-            n_images = batched_images.shape[0] // n_batch
+            n_images = len(batched_images) // n_batch
         assert n_images is not None
         # Now we can apply the underlying FeatureExtractor.
         # Its output must be a FeatureMaps.
         assert batched_inputs is not None
@@ -250,7 +271,6 @@ class SimpleTimeSeries(FeatureExtractor):
             )
             for feat_map in encoder_output.feature_maps
         ]
         # Groups defaults to flattening all the feature maps.
         groups = self.groups
         if not groups:

rslearn/models/ssl4eo_s12.py CHANGED Viewed

@@ -13,7 +13,7 @@ class Ssl4eoS12(FeatureExtractor):
     def __init__(
         self,
-        backbone_ckpt_path: str,
+        backbone_ckpt_path: str | None,
         arch: str = "resnet50",
         output_layers: list[int] = [0, 1, 2, 3],
     ) -> None:
@@ -39,19 +39,22 @@ class Ssl4eoS12(FeatureExtractor):
         else:
             raise ValueError(f"unknown SSL4EO-S12 architecture {arch}")
-        state_dict = torch.load(backbone_ckpt_path, weights_only=True)
-        state_dict = state_dict["teacher"]
-        prefix = "module.backbone."
-        state_dict = {
-            k[len(prefix) :]: v for k, v in state_dict.items() if k.startswith(prefix)
-        }
-        missing_keys, unexpected_keys = self.model.load_state_dict(
-            state_dict, strict=False
-        )
-        if missing_keys or unexpected_keys:
-            print(
-                f"warning: got missing_keys={missing_keys}, unexpected_keys={unexpected_keys} when loading SSL4EO-S12 state dict"
+        if backbone_ckpt_path is not None:
+            state_dict = torch.load(backbone_ckpt_path, weights_only=True)
+            state_dict = state_dict["teacher"]
+            prefix = "module.backbone."
+            state_dict = {
+                k[len(prefix) :]: v
+                for k, v in state_dict.items()
+                if k.startswith(prefix)
+            }
+            missing_keys, unexpected_keys = self.model.load_state_dict(
+                state_dict, strict=False
             )
+            if missing_keys or unexpected_keys:
+                print(
+                    f"warning: got missing_keys={missing_keys}, unexpected_keys={unexpected_keys} when loading SSL4EO-S12 state dict"
+                )
     def get_backbone_channels(self) -> list[tuple[int, int]]:
         """Returns the output channels of this model when used as a backbone.
@@ -91,7 +94,9 @@ class Ssl4eoS12(FeatureExtractor):
         Returns:
             feature maps computed by the pre-trained model.
         """
-        x = torch.stack([inp["image"] for inp in context.inputs], dim=0)
+        x = torch.stack(
+            [inp["image"].single_ts_to_chw_tensor() for inp in context.inputs], dim=0
+        )
         x = self.model.conv1(x)
         x = self.model.bn1(x)
         x = self.model.relu(x)

rslearn/models/swin.py CHANGED Viewed

@@ -151,7 +151,9 @@ class Swin(FeatureExtractor):
             a FeatureVector if the configured output_layers is None, or a FeatureMaps
                 otherwise containing one feature map per configured output layer.
         """
-        images = torch.stack([inp["image"] for inp in context.inputs], dim=0)
+        images = torch.stack(
+            [inp["image"].single_ts_to_chw_tensor() for inp in context.inputs], dim=0
+        )
         if self.output_layers:
             layer_features = []

rslearn/models/terramind.py CHANGED Viewed

@@ -143,7 +143,8 @@ class Terramind(FeatureExtractor):
             if modality not in context.inputs[0]:
                 continue
             cur = torch.stack(
-                [inp[modality] for inp in context.inputs], dim=0
+                [inp[modality].single_ts_to_chw_tensor() for inp in context.inputs],
+                dim=0,
             )  # (B, C, H, W)
             if self.do_resizing and (
                 cur.shape[2] != IMAGE_SIZE or cur.shape[3] != IMAGE_SIZE
@@ -219,7 +220,7 @@ class TerramindNormalize(Transform):
         Returns:
             The normalized image.
         """
-        images = image.float()  # (C, H, W)
+        images = image.float()  # (C, 1, H, W)
         if images.shape[0] % len(means) != 0:
             raise ValueError(
                 f"the number of image channels {images.shape[0]} is not multiple of expected number of bands {len(means)}"
@@ -247,8 +248,8 @@ class TerramindNormalize(Transform):
             band_info = PRETRAINED_BANDS[modality]
             means = [band_info[band][0] for band in band_info]
             stds = [band_info[band][1] for band in band_info]
-            input_dict[modality] = self.apply_image(
-                input_dict[modality],
+            input_dict[modality].image = self.apply_image(
+                input_dict[modality].image,
                 means,
                 stds,
             )

rslearn/train/all_patches_dataset.py CHANGED Viewed

@@ -10,7 +10,7 @@ import torch
 from rslearn.dataset import Window
 from rslearn.train.dataset import DataInput, ModelDataset
-from rslearn.train.model_context import SampleMetadata
+from rslearn.train.model_context import RasterImage, SampleMetadata
 from rslearn.utils.geometry import PixelBounds, STGeometry
@@ -99,6 +99,30 @@ def pad_slice_protect(
     return raw_inputs, passthrough_inputs
+def crop_tensor_or_rasterimage(
+    x: torch.Tensor | RasterImage, start: tuple[int, int], end: tuple[int, int]
+) -> torch.Tensor | RasterImage:
+    """Crop a tensor or a RasterImage."""
+    if isinstance(x, torch.Tensor):
+        # Crop the CHW tensor with scaled coordinates.
+        return x[
+            :,
+            start[1] : end[1],
+            start[0] : end[0],
+        ].clone()
+    else:
+        # Crop the CTHW tensor with scaled coordinates.
+        return RasterImage(
+            x.image[
+                :,
+                :,
+                start[1] : end[1],
+                start[0] : end[0],
+            ].clone(),
+            x.timestamps,
+        )
 class IterableAllPatchesDataset(torch.utils.data.IterableDataset):
     """This wraps a ModelDataset to iterate over all patches in that dataset.
@@ -281,7 +305,7 @@ class IterableAllPatchesDataset(torch.utils.data.IterableDataset):
                     def crop_input_dict(d: dict[str, Any]) -> dict[str, Any]:
                         cropped = {}
                         for input_name, value in d.items():
-                            if isinstance(value, torch.Tensor):
+                            if isinstance(value, torch.Tensor | RasterImage):
                                 # Get resolution scale for this input
                                 rf = self.inputs[input_name].resolution_factor
                                 scale = rf.numerator / rf.denominator
@@ -294,12 +318,9 @@ class IterableAllPatchesDataset(torch.utils.data.IterableDataset):
                                     int(end_offset[0] * scale),
                                     int(end_offset[1] * scale),
                                 )
-                                # Crop the CHW tensor with scaled coordinates.
-                                cropped[input_name] = value[
-                                    :,
-                                    scaled_start[1] : scaled_end[1],
-                                    scaled_start[0] : scaled_end[0],
-                                ].clone()
+                                cropped[input_name] = crop_tensor_or_rasterimage(
+                                    value, scaled_start, scaled_end
+                                )
                             elif isinstance(value, list):
                                 cropped[input_name] = [
                                     feat
@@ -429,7 +450,7 @@ class InMemoryAllPatchesDataset(torch.utils.data.Dataset):
         """
         cropped = {}
         for input_name, value in d.items():
-            if isinstance(value, torch.Tensor):
+            if isinstance(value, torch.Tensor | RasterImage):
                 # Get resolution scale for this input
                 rf = self.inputs[input_name].resolution_factor
                 scale = rf.numerator / rf.denominator
@@ -442,11 +463,10 @@ class InMemoryAllPatchesDataset(torch.utils.data.Dataset):
                     int(end_offset[0] * scale),
                     int(end_offset[1] * scale),
                 )
-                cropped[input_name] = value[
-                    :,
-                    scaled_start[1] : scaled_end[1],
-                    scaled_start[0] : scaled_end[0],
-                ].clone()
+                cropped[input_name] = crop_tensor_or_rasterimage(
+                    value, scaled_start, scaled_end
+                )
             elif isinstance(value, list):
                 cropped[input_name] = [
                     feat for feat in value if cur_geom.intersects(feat.geometry)

rslearn/train/dataset.py CHANGED Viewed

@@ -8,6 +8,7 @@ import random
 import tempfile
 import time
 import uuid
+from datetime import datetime
 from typing import Any
 import torch
@@ -19,10 +20,16 @@ from rslearn.config import (
     DType,
     LayerConfig,
 )
+from rslearn.data_sources.data_source import Item
 from rslearn.dataset.dataset import Dataset
 from rslearn.dataset.storage.file import FileWindowStorage
-from rslearn.dataset.window import Window, get_layer_and_group_from_dir_name
+from rslearn.dataset.window import (
+    Window,
+    WindowLayerData,
+    get_layer_and_group_from_dir_name,
+)
 from rslearn.log_utils import get_logger
+from rslearn.train.model_context import RasterImage
 from rslearn.utils.feature import Feature
 from rslearn.utils.geometry import PixelBounds, ResolutionFactor
 from rslearn.utils.mp import star_imap_unordered
@@ -198,7 +205,8 @@ def read_raster_layer_for_data_input(
     group_idx: int,
     layer_config: LayerConfig,
     data_input: DataInput,
-) -> torch.Tensor:
+    layer_data: WindowLayerData | None,
+) -> tuple[torch.Tensor, tuple[datetime, datetime] | None]:
     """Read a raster layer for a DataInput.
     This scans the available rasters for the layer at the window to determine which
@@ -211,9 +219,11 @@ def read_raster_layer_for_data_input(
         group_idx: the item group.
         layer_config: the layer configuration.
         data_input: the DataInput that specifies the bands and dtype.
+        layer_data: the WindowLayerData associated with this layer and window.
     Returns:
-        tensor containing raster data.
+        RasterImage containing raster data and the timestamp associated
+            with that data.
     """
     # See what different sets of bands we need to read to get all the
     # configured bands.
@@ -284,7 +294,34 @@ def read_raster_layer_for_data_input(
             src[src_indexes, :, :].astype(data_input.dtype.get_numpy_dtype())
         )
-    return image
+    # add the timestamp. this is a tuple defining the start and end of the time range.
+    time_range = None
+    if layer_data is not None:
+        item = Item.deserialize(layer_data.serialized_item_groups[group_idx][0])
+        if item.geometry.time_range is not None:
+            # we assume if one layer data has a geometry & time range, all of them do
+            time_ranges = [
+                (
+                    datetime.fromisoformat(
+                        Item.deserialize(
+                            layer_data.serialized_item_groups[group_idx][idx]
+                        ).geometry.time_range[0]  # type: ignore
+                    ),
+                    datetime.fromisoformat(
+                        Item.deserialize(
+                            layer_data.serialized_item_groups[group_idx][idx]
+                        ).geometry.time_range[1]  # type: ignore
+                    ),
+                )
+                for idx in range(len(layer_data.serialized_item_groups[group_idx]))
+            ]
+            # take the min and max
+            time_range = (
+                min([t[0] for t in time_ranges]),
+                max([t[1] for t in time_ranges]),
+            )
+    return image, time_range
 def read_data_input(
@@ -293,7 +330,7 @@ def read_data_input(
     bounds: PixelBounds,
     data_input: DataInput,
     rng: random.Random,
-) -> torch.Tensor | list[Feature]:
+) -> RasterImage | list[Feature]:
     """Read the data specified by the DataInput from the window.
     Args:
@@ -335,15 +372,34 @@ def read_data_input(
         layers_to_read = [rng.choice(layer_options)]
     if data_input.data_type == "raster":
+        # load it once here
+        layer_datas = window.load_layer_datas()
         images: list[torch.Tensor] = []
+        time_ranges: list[tuple[datetime, datetime] | None] = []
         for layer_name, group_idx in layers_to_read:
             layer_config = dataset.layers[layer_name]
-            images.append(
-                read_raster_layer_for_data_input(
-                    window, bounds, layer_name, group_idx, layer_config, data_input
-                )
+            image, time_range = read_raster_layer_for_data_input(
+                window,
+                bounds,
+                layer_name,
+                group_idx,
+                layer_config,
+                data_input,
+                # some layers (e.g. "label_raster") won't have associated
+                # layer datas
+                layer_datas[layer_name] if layer_name in layer_datas else None,
             )
-        return torch.cat(images, dim=0)
+            if len(time_ranges) > 0:
+                if type(time_ranges[-1]) is not type(time_range):
+                    raise ValueError(
+                        f"All time ranges should be datetime tuples or None. Got {type(time_range)} amd {type(time_ranges[-1])}"
+                    )
+            images.append(image)
+            time_ranges.append(time_range)
+        return RasterImage(
+            torch.stack(images, dim=1),
+            time_ranges if time_ranges[0] is not None else None,  # type: ignore
+        )
     elif data_input.data_type == "vector":
         # We don't really support time series for vector data currently, we just

rslearn/train/model_context.py CHANGED Viewed

@@ -10,6 +10,40 @@ import torch
 from rslearn.utils.geometry import PixelBounds, Projection
+@dataclass
+class RasterImage:
+    """A raster image is a torch.tensor containing the images and their associated timestamps."""
+    # image is a 4D CTHW tensor
+    image: torch.Tensor
+    # if timestamps is not None, len(timestamps) must match the T dimension of the tensor
+    timestamps: list[tuple[datetime, datetime]] | None = None
+    @property
+    def shape(self) -> torch.Size:
+        """The shape of the image."""
+        return self.image.shape
+    def dim(self) -> int:
+        """The dim of the image."""
+        return self.image.dim()
+    @property
+    def dtype(self) -> torch.dtype:
+        """The image dtype."""
+        return self.image.dtype
+    def single_ts_to_chw_tensor(self) -> torch.Tensor:
+        """Single timestep models expect single timestep inputs.
+        This function (1) checks this raster image only has 1 timestep and
+        (2) returns the tensor for that (single) timestep (going from CTHW to CHW).
+        """
+        if self.image.shape[1] != 1:
+            raise ValueError(f"Expected a single timestep, got {self.image.shape[1]}")
+        return self.image[:, 0]
 @dataclass
 class SampleMetadata:
     """Metadata pertaining to an example."""
@@ -32,7 +66,7 @@ class ModelContext:
     """Context to pass to all model components."""
     # One input dict per example in the batch.
-    inputs: list[dict[str, torch.Tensor]]
+    inputs: list[dict[str, torch.Tensor | RasterImage]]
     # One SampleMetadata per example in the batch.
     metadatas: list[SampleMetadata]
     # Arbitrary dict that components can add to.

rslearn 0.0.19__py3-none-any.whl → 0.0.20__py3-none-any.whl

rslearn 0.0.19py3-none-any.whl → 0.0.20py3-none-any.whl