PyPI - rslearn - Versions diffs - 0.0.18__py3-none-any.whl → 0.0.20__py3-none-any.whl - Mend

rslearn 0.0.18py3-none-any.whl → 0.0.20py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (55) hide show

rslearn/arg_parser.py +2 -9
rslearn/config/dataset.py +15 -16
rslearn/dataset/dataset.py +28 -22
rslearn/lightning_cli.py +22 -11
rslearn/main.py +1 -1
rslearn/models/anysat.py +35 -33
rslearn/models/attention_pooling.py +177 -0
rslearn/models/clip.py +5 -2
rslearn/models/component.py +12 -0
rslearn/models/croma.py +11 -3
rslearn/models/dinov3.py +2 -1
rslearn/models/faster_rcnn.py +2 -1
rslearn/models/galileo/galileo.py +58 -31
rslearn/models/module_wrapper.py +6 -1
rslearn/models/molmo.py +4 -2
rslearn/models/olmoearth_pretrain/model.py +206 -51
rslearn/models/olmoearth_pretrain/norm.py +5 -3
rslearn/models/panopticon.py +3 -1
rslearn/models/presto/presto.py +45 -15
rslearn/models/prithvi.py +9 -7
rslearn/models/sam2_enc.py +3 -1
rslearn/models/satlaspretrain.py +4 -1
rslearn/models/simple_time_series.py +43 -17
rslearn/models/ssl4eo_s12.py +19 -14
rslearn/models/swin.py +3 -1
rslearn/models/terramind.py +5 -4
rslearn/train/all_patches_dataset.py +96 -28
rslearn/train/dataset.py +102 -53
rslearn/train/model_context.py +35 -1
rslearn/train/scheduler.py +15 -0
rslearn/train/tasks/classification.py +8 -2
rslearn/train/tasks/detection.py +3 -2
rslearn/train/tasks/multi_task.py +2 -3
rslearn/train/tasks/per_pixel_regression.py +14 -5
rslearn/train/tasks/regression.py +8 -2
rslearn/train/tasks/segmentation.py +13 -4
rslearn/train/tasks/task.py +2 -2
rslearn/train/transforms/concatenate.py +45 -5
rslearn/train/transforms/crop.py +22 -8
rslearn/train/transforms/flip.py +13 -5
rslearn/train/transforms/mask.py +11 -2
rslearn/train/transforms/normalize.py +46 -15
rslearn/train/transforms/pad.py +15 -3
rslearn/train/transforms/resize.py +83 -0
rslearn/train/transforms/select_bands.py +11 -2
rslearn/train/transforms/sentinel1.py +18 -3
rslearn/utils/geometry.py +73 -0
rslearn/utils/jsonargparse.py +66 -0
{rslearn-0.0.18.dist-info → rslearn-0.0.20.dist-info}/METADATA +1 -1
{rslearn-0.0.18.dist-info → rslearn-0.0.20.dist-info}/RECORD +55 -53
{rslearn-0.0.18.dist-info → rslearn-0.0.20.dist-info}/WHEEL +0 -0
{rslearn-0.0.18.dist-info → rslearn-0.0.20.dist-info}/entry_points.txt +0 -0
{rslearn-0.0.18.dist-info → rslearn-0.0.20.dist-info}/licenses/LICENSE +0 -0
{rslearn-0.0.18.dist-info → rslearn-0.0.20.dist-info}/licenses/NOTICE +0 -0
{rslearn-0.0.18.dist-info → rslearn-0.0.20.dist-info}/top_level.txt +0 -0

rslearn/arg_parser.py CHANGED Viewed

@@ -1,6 +1,5 @@
 """Custom Lightning ArgumentParser with environment variable substitution support."""
-import os
 from typing import Any
 from jsonargparse import Namespace
@@ -21,11 +20,7 @@ class RslearnArgumentParser(LightningArgumentParser):
     def parse_string(
         self,
         cfg_str: str,
-        cfg_path: str | os.PathLike = "",
-        ext_vars: dict | None = None,
-        env: bool | None = None,
-        defaults: bool = True,
-        with_meta: bool | None = None,
+        *args: Any,
         **kwargs: Any,
     ) -> Namespace:
         """Pre-processes string for environment variable substitution before parsing."""
@@ -33,6 +28,4 @@ class RslearnArgumentParser(LightningArgumentParser):
         substituted_cfg_str = substitute_env_vars_in_string(cfg_str)
         # Call the parent method with the substituted config
-        return super().parse_string(
-            substituted_cfg_str, cfg_path, ext_vars, env, defaults, with_meta, **kwargs
-        )
+        return super().parse_string(substituted_cfg_str, *args, **kwargs)

rslearn/config/dataset.py CHANGED Viewed

@@ -25,7 +25,7 @@ from rasterio.enums import Resampling
 from upath import UPath
 from rslearn.log_utils import get_logger
-from rslearn.utils import PixelBounds, Projection
+from rslearn.utils.geometry import PixelBounds, Projection, ResolutionFactor
 from rslearn.utils.raster_format import RasterFormat
 from rslearn.utils.vector_format import VectorFormat
@@ -215,22 +215,12 @@ class BandSetConfig(BaseModel):
         Returns:
             tuple of updated projection and bounds with zoom offset applied
         """
-        if self.zoom_offset == 0:
-            return projection, bounds
-        projection = Projection(
-            projection.crs,
-            projection.x_resolution / (2**self.zoom_offset),
-            projection.y_resolution / (2**self.zoom_offset),
-        )
-        if self.zoom_offset > 0:
-            zoom_factor = 2**self.zoom_offset
-            bounds = tuple(x * zoom_factor for x in bounds)  # type: ignore
+        if self.zoom_offset >= 0:
+            factor = ResolutionFactor(numerator=2**self.zoom_offset)
         else:
-            bounds = tuple(
-                x // (2 ** (-self.zoom_offset))
-                for x in bounds  # type: ignore
-            )
-        return projection, bounds
+            factor = ResolutionFactor(denominator=2 ** (-self.zoom_offset))
+        return (factor.multiply_projection(projection), factor.multiply_bounds(bounds))
     @field_validator("format", mode="before")
     @classmethod
@@ -645,3 +635,12 @@ class DatasetConfig(BaseModel):
         default_factory=lambda: StorageConfig(),
         description="jsonargparse configuration for the WindowStorageFactory.",
     )
+    @field_validator("layers", mode="after")
+    @classmethod
+    def layer_names_validator(cls, v: dict[str, LayerConfig]) -> dict[str, LayerConfig]:
+        """Ensure layer names don't contain periods, since we use periods to distinguish different materialized groups within a layer."""
+        for layer_name in v.keys():
+            if "." in layer_name:
+                raise ValueError(f"layer names must not contain periods: {layer_name}")
+        return v

rslearn/dataset/dataset.py CHANGED Viewed

@@ -23,7 +23,7 @@ class Dataset:
     .. code-block:: none
         dataset/
-            config.json
+            config.json  # optional, if config provided as runtime object
             windows/
                 group1/
                     epsg:3857_10_623565_1528020/
@@ -40,37 +40,43 @@ class Dataset:
     materialize.
     """
-    def __init__(self, path: UPath, disabled_layers: list[str] = []) -> None:
+    def __init__(
+        self,
+        path: UPath,
+        disabled_layers: list[str] = [],
+        dataset_config: DatasetConfig | None = None,
+    ) -> None:
         """Initializes a new Dataset.
         Args:
             path: the root directory of the dataset
             disabled_layers: list of layers to disable
+            dataset_config: optional dataset configuration to use instead of loading from the dataset directory
         """
         self.path = path
-        # Load dataset configuration.
-        with (self.path / "config.json").open("r") as f:
-            config_content = f.read()
-            config_content = substitute_env_vars_in_string(config_content)
-            config = DatasetConfig.model_validate(json.loads(config_content))
-            self.layers = {}
-            for layer_name, layer_config in config.layers.items():
-                # Layer names must not contain period, since we use period to
-                # distinguish different materialized groups within a layer.
-                assert "." not in layer_name, "layer names must not contain periods"
-                if layer_name in disabled_layers:
-                    logger.warning(f"Layer {layer_name} is disabled")
-                    continue
-                self.layers[layer_name] = layer_config
-            self.tile_store_config = config.tile_store
-            self.storage = (
-                config.storage.instantiate_window_storage_factory().get_storage(
-                    self.path
+        if dataset_config is None:
+            # Load dataset configuration from the dataset directory.
+            with (self.path / "config.json").open("r") as f:
+                config_content = f.read()
+                config_content = substitute_env_vars_in_string(config_content)
+                dataset_config = DatasetConfig.model_validate(
+                    json.loads(config_content)
                 )
+        self.layers = {}
+        for layer_name, layer_config in dataset_config.layers.items():
+            if layer_name in disabled_layers:
+                logger.warning(f"Layer {layer_name} is disabled")
+                continue
+            self.layers[layer_name] = layer_config
+        self.tile_store_config = dataset_config.tile_store
+        self.storage = (
+            dataset_config.storage.instantiate_window_storage_factory().get_storage(
+                self.path
             )
+        )
     def load_windows(
         self,

rslearn/lightning_cli.py CHANGED Viewed

@@ -21,6 +21,7 @@ from rslearn.log_utils import get_logger
 from rslearn.train.data_module import RslearnDataModule
 from rslearn.train.lightning_module import RslearnLightningModule
 from rslearn.utils.fsspec import open_atomic
+from rslearn.utils.jsonargparse import init_jsonargparse
 WANDB_ID_FNAME = "wandb_id"
@@ -390,8 +391,15 @@ class RslearnLightningCLI(LightningCLI):
         Sets the dataset path for any configured RslearnPredictionWriter callbacks.
         """
-        subcommand = self.config.subcommand
-        c = self.config[subcommand]
+        if not hasattr(self.config, "subcommand"):
+            logger.warning(
+                "Config does not have subcommand attribute, assuming we are in run=False mode"
+            )
+            subcommand = None
+            c = self.config
+        else:
+            subcommand = self.config.subcommand
+            c = self.config[subcommand]
         # If there is a RslearnPredictionWriter, set its path.
         prediction_writer_callback = None
@@ -415,16 +423,17 @@ class RslearnLightningCLI(LightningCLI):
         if subcommand == "predict":
             c.return_predictions = False
-        # For now we use DDP strategy with find_unused_parameters=True.
+        # Default to DDP with find_unused_parameters. Likely won't get called with unified config
         if subcommand == "fit":
-            c.trainer.strategy = jsonargparse.Namespace(
-                {
-                    "class_path": "lightning.pytorch.strategies.DDPStrategy",
-                    "init_args": jsonargparse.Namespace(
-                        {"find_unused_parameters": True}
-                    ),
-                }
-            )
+            if not c.trainer.strategy:
+                c.trainer.strategy = jsonargparse.Namespace(
+                    {
+                        "class_path": "lightning.pytorch.strategies.DDPStrategy",
+                        "init_args": jsonargparse.Namespace(
+                            {"find_unused_parameters": True}
+                        ),
+                    }
+                )
         if c.management_dir:
             self.enable_project_management(c.management_dir)
@@ -432,6 +441,8 @@ class RslearnLightningCLI(LightningCLI):
 def model_handler() -> None:
     """Handler for any rslearn model X commands."""
+    init_jsonargparse()
     RslearnLightningCLI(
         model_class=RslearnLightningModule,
         datamodule_class=RslearnDataModule,

rslearn/main.py CHANGED Viewed

@@ -380,7 +380,7 @@ def apply_on_windows(
 def apply_on_windows_args(f: Callable[..., Any], args: argparse.Namespace) -> None:
     """Call apply_on_windows with arguments passed via command-line interface."""
-    dataset = Dataset(UPath(args.root), args.disabled_layers)
+    dataset = Dataset(UPath(args.root), disabled_layers=args.disabled_layers)
     apply_on_windows(
         f=f,
         dataset=dataset,

rslearn/models/anysat.py CHANGED Viewed

@@ -4,6 +4,8 @@ This code loads the AnySat model from torch hub. See
 https://github.com/gastruc/AnySat for applicable license and copyright information.
 """
+from datetime import datetime
 import torch
 from einops import rearrange
@@ -53,7 +55,6 @@ class AnySat(FeatureExtractor):
         self,
         modalities: list[str],
         patch_size_meters: int,
-        dates: dict[str, list[int]],
         output: str = "patch",
         output_modality: str | None = None,
         hub_repo: str = "gastruc/anysat",
@@ -85,14 +86,6 @@ class AnySat(FeatureExtractor):
             if m not in MODALITY_RESOLUTIONS:
                 raise ValueError(f"Invalid modality: {m}")
-        if not all(m in TIME_SERIES_MODALITIES for m in dates.keys()):
-            raise ValueError("`dates` keys must be time-series modalities only.")
-        for m in modalities:
-            if m in TIME_SERIES_MODALITIES and m not in dates:
-                raise ValueError(
-                    f"Missing required dates for time-series modality '{m}'."
-                )
         if patch_size_meters % 10 != 0:
             raise ValueError(
                 "In AnySat, `patch_size` is in meters and must be a multiple of 10."
@@ -106,7 +99,6 @@ class AnySat(FeatureExtractor):
         self.modalities = modalities
         self.patch_size_meters = int(patch_size_meters)
-        self.dates = dates
         self.output = output
         self.output_modality = output_modality
@@ -119,6 +111,20 @@ class AnySat(FeatureExtractor):
         )
         self._embed_dim = 768  # base width, 'dense' returns 2x
+    @staticmethod
+    def time_ranges_to_doy(
+        time_ranges: list[tuple[datetime, datetime]],
+        device: torch.device,
+    ) -> torch.Tensor:
+        """Turn the time ranges stored in a RasterImage to timestamps accepted by AnySat.
+        AnySat uses the doy with each timestamp, so we take the midpoint
+        the time range. For some inputs (e.g. Sentinel 2) we take an image from a specific
+        time so that start_time == end_time == mid_time.
+        """
+        doys = [(t[0] + ((t[1] - t[0]) / 2)).timetuple().tm_yday for t in time_ranges]
+        return torch.tensor(doys, dtype=torch.int32, device=device)
     def forward(self, context: ModelContext) -> FeatureMaps:
         """Forward pass for the AnySat model.
@@ -139,17 +145,29 @@ class AnySat(FeatureExtractor):
                 raise ValueError(f"Modality '{modality}' not present in inputs.")
             cur = torch.stack(
-                [inp[modality] for inp in inputs], dim=0
-            )  # (B, C, H, W) or (B, T*C, H, W)
+                [inp[modality].image for inp in inputs], dim=0
+            )  # (B, C, T, H, W)
             if modality in TIME_SERIES_MODALITIES:
-                num_dates = len(self.dates[modality])
-                num_bands = cur.shape[1] // num_dates
-                cur = rearrange(
-                    cur, "b (t c) h w -> b t c h w", t=num_dates, c=num_bands
-                )
+                num_bands = cur.shape[1]
+                cur = rearrange(cur, "b c t h w -> b t c h w")
                 H, W = cur.shape[-2], cur.shape[-1]
+                if inputs[0][modality].timestamps is None:
+                    raise ValueError(
+                        f"Require timestamps for time series modality {modality}"
+                    )
+                timestamps = torch.stack(
+                    [
+                        self.time_ranges_to_doy(inp[modality].timestamps, cur.device)  # type: ignore
+                        for inp in inputs
+                    ],
+                    dim=0,
+                )
+                batch[f"{modality}_dates"] = timestamps
             else:
+                # take the first (assumed only) timestep
+                cur = cur[:, :, 0]
                 num_bands = cur.shape[1]
                 H, W = cur.shape[-2], cur.shape[-1]
@@ -173,22 +191,6 @@ class AnySat(FeatureExtractor):
                     "All modalities must share the same spatial extent (H*res, W*res)."
                 )
-        # Add *_dates
-        to_add = {}
-        for modality, x in list(batch.items()):
-            if modality in TIME_SERIES_MODALITIES:
-                B, T = x.shape[0], x.shape[1]
-                d = torch.as_tensor(
-                    self.dates[modality], dtype=torch.long, device=x.device
-                )
-                if d.ndim != 1 or d.numel() != T:
-                    raise ValueError(
-                        f"dates for '{modality}' must be 1D length {T}, got {tuple(d.shape)}"
-                    )
-                to_add[f"{modality}_dates"] = d.unsqueeze(0).repeat(B, 1)
-        batch.update(to_add)
         kwargs = {"patch_size": self.patch_size_meters, "output": self.output}
         if self.output == "dense":
             kwargs["output_modality"] = self.output_modality

rslearn/models/attention_pooling.py ADDED Viewed

@@ -0,0 +1,177 @@
+"""An attention pooling layer."""
+import math
+from typing import Any
+import torch
+import torch.nn.functional as F
+from einops import rearrange
+from torch import nn
+from rslearn.models.component import (
+    FeatureMaps,
+    IntermediateComponent,
+    TokenFeatureMaps,
+)
+from rslearn.train.model_context import ModelContext
+class SimpleAttentionPool(IntermediateComponent):
+    """Simple Attention Pooling.
+    Given a token feature map of shape BCHWN,
+    learn an attention layer which aggregates over
+    the N dimension.
+    This is done simply by learning a mapping D->1 which is the weight
+    which should be assigned to each token during averaging:
+    output = sum [feat_token * W(feat_token) for feat_token in feat_tokens]
+    """
+    def __init__(self, in_dim: int, hidden_linear: bool = False) -> None:
+        """Initialize the simple attention pooling layer.
+        Args:
+            in_dim: the encoding dimension D
+            hidden_linear: whether to apply an additional linear transformation D -> D
+                to the feat tokens. If this is True, a ReLU activation is applied
+                after the first linear transformation.
+        """
+        super().__init__()
+        if hidden_linear:
+            self.hidden_linear = nn.Linear(in_features=in_dim, out_features=in_dim)
+        else:
+            self.hidden_linear = None
+        self.linear = nn.Linear(in_features=in_dim, out_features=1)
+    def forward_for_map(self, feat_tokens: torch.Tensor) -> torch.Tensor:
+        """Attention pooling for a single feature map (BCHWN tensor)."""
+        B, D, H, W, N = feat_tokens.shape
+        feat_tokens = rearrange(feat_tokens, "b d h w n -> (b h w) n d")
+        if self.hidden_linear is not None:
+            feat_tokens = torch.nn.functional.relu(self.hidden_linear(feat_tokens))
+        attention_scores = torch.nn.functional.softmax(self.linear(feat_tokens), dim=1)
+        feat_tokens = (attention_scores * feat_tokens).sum(dim=1)
+        return rearrange(feat_tokens, "(b h w) d -> b d h w", b=B, h=H, w=W)
+    def forward(self, intermediates: Any, context: ModelContext) -> FeatureMaps:
+        """Forward pass for attention pooling linear probe.
+        Args:
+            intermediates: the output from the previous component, which must be a TokenFeatureMaps.
+                We pool over the final dimension in the TokenFeatureMaps. If multiple maps
+                are passed, we apply the same linear layers to all of them.
+            context: the model context.
+            feat_tokens (torch.Tensor): Input feature tokens of shape (B, C, H, W, N).
+        Returns:
+            torch.Tensor:
+                - output, attentioned pool over the last dimension (B, C, H, W)
+        """
+        if not isinstance(intermediates, TokenFeatureMaps):
+            raise ValueError("input to Attention Pool must be a TokenFeatureMaps")
+        features = []
+        for feat in intermediates.feature_maps:
+            features.append(self.forward_for_map(feat))
+        return FeatureMaps(features)
+class AttentionPool(IntermediateComponent):
+    """Attention Pooling.
+    Given a feature map of shape BCHWN,
+    learn an attention layer which aggregates over
+    the N dimension.
+    We do this by learning a query token, and applying a standard
+    attention mechanism against this learned query token.
+    """
+    def __init__(self, in_dim: int, num_heads: int, linear_on_kv: bool = True) -> None:
+        """Initialize the attention pooling layer.
+        Args:
+            in_dim: the encoding dimension D
+            num_heads: the number of heads to use
+            linear_on_kv: Whether to apply a linear layer on the input tokens
+            to create the key and value tokens.
+        """
+        super().__init__()
+        self.query_token: nn.Parameter = nn.Parameter(torch.empty(in_dim))
+        if linear_on_kv:
+            self.k_linear = nn.Linear(in_dim, in_dim)
+            self.v_linear = nn.Linear(in_dim, in_dim)
+        else:
+            self.k_linear = None
+            self.v_linear = None
+        if in_dim % num_heads != 0:
+            raise ValueError(
+                f"in_dim must be divisible by num_heads. Got {in_dim} and {num_heads}."
+            )
+        self.num_heads = num_heads
+        self.init_weights()
+    def init_weights(self) -> None:
+        """Initialize weights for the probe."""
+        nn.init.trunc_normal_(self.query_token, std=0.02)
+    def forward_for_map(self, feat_tokens: torch.Tensor) -> torch.Tensor:
+        """Attention pooling for a single feature map (BCHWN tensor)."""
+        B, D, H, W, N = feat_tokens.shape
+        feat_tokens = rearrange(feat_tokens, "b d h w n -> (b h w) n d")
+        collapsed_dim = B * H * W
+        q = self.query_token.expand(collapsed_dim, 1, -1)
+        q = q.reshape(
+            collapsed_dim, 1, self.num_heads, D // self.num_heads
+        )  # [B, 1, head, D_head]
+        q = rearrange(q, "b h n d -> b n h d")
+        if self.k_linear is not None:
+            assert self.v_linear is not None
+            k = self.k_linear(feat_tokens).reshape(
+                collapsed_dim, N, self.num_heads, D // self.num_heads
+            )
+            v = self.v_linear(feat_tokens).reshape(
+                collapsed_dim, N, self.num_heads, D // self.num_heads
+            )
+        else:
+            k = feat_tokens.reshape(
+                collapsed_dim, N, self.num_heads, D // self.num_heads
+            )
+            v = feat_tokens.reshape(
+                collapsed_dim, N, self.num_heads, D // self.num_heads
+            )
+        k = rearrange(k, "b n h d -> b h n d")
+        v = rearrange(v, "b n h d -> b h n d")
+        # Compute attention scores
+        attn_scores = torch.matmul(q, k.transpose(-2, -1)) / math.sqrt(
+            D // self.num_heads
+        )
+        attn_weights = F.softmax(attn_scores, dim=-1)
+        x = torch.matmul(attn_weights, v)  # [B, head, 1, D_head]
+        return x.reshape(B, D, H, W)
+    def forward(self, intermediates: Any, context: ModelContext) -> FeatureMaps:
+        """Forward pass for attention pooling linear probe.
+        Args:
+            intermediates: the output from the previous component, which must be a TokenFeatureMaps.
+                We pool over the final dimension in the TokenFeatureMaps. If multiple feature
+                maps are passed, we apply the same attention weights (query token and linear k, v layers)
+                to all the maps.
+            context: the model context.
+            feat_tokens (torch.Tensor): Input feature tokens of shape (B, C, H, W, N).
+        Returns:
+            torch.Tensor:
+                - output, attentioned pool over the last dimension (B, C, H, W)
+        """
+        if not isinstance(intermediates, TokenFeatureMaps):
+            raise ValueError("input to Attention Pool must be a TokenFeatureMaps")
+        features = []
+        for feat in intermediates.feature_maps:
+            features.append(self.forward_for_map(feat))
+        return FeatureMaps(features)

rslearn/models/clip.py CHANGED Viewed

@@ -43,9 +43,12 @@ class CLIP(FeatureExtractor):
             a FeatureMaps with one feature map from the ViT, which is always Bx24x24x1024.
         """
         inputs = context.inputs
-        device = inputs[0]["image"].device
+        device = inputs[0]["image"].image.device
         clip_inputs = self.processor(
-            images=[inp["image"].cpu().numpy().transpose(1, 2, 0) for inp in inputs],
+            images=[
+                inp["image"].single_ts_to_chw_tensor().cpu().numpy().transpose(1, 2, 0)
+                for inp in inputs
+            ],
             return_tensors="pt",
             padding=True,
         )

rslearn/models/component.py CHANGED Viewed

@@ -91,6 +91,18 @@ class FeatureMaps:
     feature_maps: list[torch.Tensor]
+@dataclass
+class TokenFeatureMaps:
+    """An intermediate output type for multi-resolution BCHWN feature maps with a token dimension.
+    Unlike `FeatureMaps`, these include an additional dimension for unpooled tokens.
+    """
+    # List of BxCxHxWxN feature maps at different scales, ordered from highest resolution
+    # (most fine-grained) to lowest resolution (coarsest).
+    feature_maps: list[torch.Tensor]
 @dataclass
 class FeatureVector:
     """An intermediate output type for a flat feature vector."""

rslearn/models/croma.py CHANGED Viewed

@@ -175,10 +175,16 @@ class Croma(FeatureExtractor):
         sentinel1: torch.Tensor | None = None
         sentinel2: torch.Tensor | None = None
         if self.modality in [CromaModality.BOTH, CromaModality.SENTINEL1]:
-            sentinel1 = torch.stack([inp["sentinel1"] for inp in context.inputs], dim=0)
+            sentinel1 = torch.stack(
+                [inp["sentinel1"].single_ts_to_chw_tensor() for inp in context.inputs],
+                dim=0,
+            )
             sentinel1 = self._resize_image(sentinel1) if self.do_resizing else sentinel1
         if self.modality in [CromaModality.BOTH, CromaModality.SENTINEL2]:
-            sentinel2 = torch.stack([inp["sentinel2"] for inp in context.inputs], dim=0)
+            sentinel2 = torch.stack(
+                [inp["sentinel2"].single_ts_to_chw_tensor() for inp in context.inputs],
+                dim=0,
+            )
             sentinel2 = self._resize_image(sentinel2) if self.do_resizing else sentinel2
         outputs = self.model(
@@ -294,5 +300,7 @@ class CromaNormalize(Transform):
         for modality in MODALITY_BANDS.keys():
             if modality not in input_dict:
                 continue
-            input_dict[modality] = self.apply_image(input_dict[modality], modality)
+            input_dict[modality].image = self.apply_image(
+                input_dict[modality].image, modality
+            )
         return input_dict, target_dict

rslearn/models/dinov3.py CHANGED Viewed

@@ -104,7 +104,8 @@ class DinoV3(FeatureExtractor):
             a FeatureMaps with one feature map.
         """
         cur = torch.stack(
-            [inp["image"] for inp in context.inputs], dim=0
+            [inp["image"].single_ts_to_chw_tensor() for inp in context.inputs],
+            dim=0,
         )  # (B, C, H, W)
         if self.do_resizing and (

rslearn/models/faster_rcnn.py CHANGED Viewed

@@ -210,7 +210,8 @@ class FasterRCNN(Predictor):
                     ),
                 )
-        image_list = [inp["image"] for inp in context.inputs]
+        # take the first (and assumed to be only) timestep
+        image_list = [inp["image"].image[:, 0] for inp in context.inputs]
         images, targets = self.noop_transform(image_list, targets)
         feature_dict = collections.OrderedDict()

rslearn 0.0.18__py3-none-any.whl → 0.0.20__py3-none-any.whl

rslearn 0.0.18py3-none-any.whl → 0.0.20py3-none-any.whl