PyPI - rslearn - Versions diffs - 0.0.17__py3-none-any.whl → 0.0.19__py3-none-any.whl - Mend

rslearn 0.0.17py3-none-any.whl → 0.0.19py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (73) hide show

rslearn/arg_parser.py +2 -9
rslearn/config/__init__.py +2 -0
rslearn/config/dataset.py +64 -20
rslearn/dataset/add_windows.py +1 -1
rslearn/dataset/dataset.py +34 -84
rslearn/dataset/materialize.py +5 -5
rslearn/dataset/storage/__init__.py +1 -0
rslearn/dataset/storage/file.py +202 -0
rslearn/dataset/storage/storage.py +140 -0
rslearn/dataset/window.py +26 -80
rslearn/lightning_cli.py +22 -11
rslearn/main.py +12 -37
rslearn/models/anysat.py +11 -9
rslearn/models/attention_pooling.py +177 -0
rslearn/models/clay/clay.py +8 -9
rslearn/models/clip.py +18 -15
rslearn/models/component.py +111 -0
rslearn/models/concatenate_features.py +21 -11
rslearn/models/conv.py +15 -8
rslearn/models/croma.py +13 -8
rslearn/models/detr/detr.py +25 -14
rslearn/models/dinov3.py +11 -6
rslearn/models/faster_rcnn.py +19 -9
rslearn/models/feature_center_crop.py +12 -9
rslearn/models/fpn.py +19 -8
rslearn/models/galileo/galileo.py +23 -18
rslearn/models/module_wrapper.py +26 -57
rslearn/models/molmo.py +16 -14
rslearn/models/multitask.py +102 -73
rslearn/models/olmoearth_pretrain/model.py +135 -38
rslearn/models/panopticon.py +8 -7
rslearn/models/pick_features.py +18 -24
rslearn/models/pooling_decoder.py +22 -14
rslearn/models/presto/presto.py +16 -10
rslearn/models/presto/single_file_presto.py +4 -10
rslearn/models/prithvi.py +12 -8
rslearn/models/resize_features.py +21 -7
rslearn/models/sam2_enc.py +11 -9
rslearn/models/satlaspretrain.py +15 -9
rslearn/models/simple_time_series.py +37 -17
rslearn/models/singletask.py +24 -17
rslearn/models/ssl4eo_s12.py +15 -10
rslearn/models/swin.py +22 -13
rslearn/models/terramind.py +24 -7
rslearn/models/trunk.py +6 -3
rslearn/models/unet.py +18 -9
rslearn/models/upsample.py +22 -9
rslearn/train/all_patches_dataset.py +89 -37
rslearn/train/dataset.py +105 -97
rslearn/train/lightning_module.py +51 -32
rslearn/train/model_context.py +54 -0
rslearn/train/prediction_writer.py +111 -41
rslearn/train/scheduler.py +15 -0
rslearn/train/tasks/classification.py +34 -15
rslearn/train/tasks/detection.py +24 -31
rslearn/train/tasks/embedding.py +33 -29
rslearn/train/tasks/multi_task.py +7 -7
rslearn/train/tasks/per_pixel_regression.py +41 -19
rslearn/train/tasks/regression.py +38 -21
rslearn/train/tasks/segmentation.py +33 -15
rslearn/train/tasks/task.py +3 -2
rslearn/train/transforms/resize.py +74 -0
rslearn/utils/geometry.py +73 -0
rslearn/utils/jsonargparse.py +66 -0
{rslearn-0.0.17.dist-info → rslearn-0.0.19.dist-info}/METADATA +1 -1
{rslearn-0.0.17.dist-info → rslearn-0.0.19.dist-info}/RECORD +71 -66
rslearn/dataset/index.py +0 -173
rslearn/models/registry.py +0 -22
{rslearn-0.0.17.dist-info → rslearn-0.0.19.dist-info}/WHEEL +0 -0
{rslearn-0.0.17.dist-info → rslearn-0.0.19.dist-info}/entry_points.txt +0 -0
{rslearn-0.0.17.dist-info → rslearn-0.0.19.dist-info}/licenses/LICENSE +0 -0
{rslearn-0.0.17.dist-info → rslearn-0.0.19.dist-info}/licenses/NOTICE +0 -0
{rslearn-0.0.17.dist-info → rslearn-0.0.19.dist-info}/top_level.txt +0 -0

rslearn/models/ssl4eo_s12.py CHANGED Viewed

@@ -1,12 +1,14 @@
 """SSL4EO-S12 models."""
-from typing import Any
 import torch
 import torchvision
+from rslearn.train.model_context import ModelContext
+from .component import FeatureExtractor, FeatureMaps
-class Ssl4eoS12(torch.nn.Module):
+class Ssl4eoS12(FeatureExtractor):
     """The SSL4EO-S12 family of pretrained models."""
     def __init__(
@@ -74,19 +76,22 @@ class Ssl4eoS12(torch.nn.Module):
     def forward(
         self,
-        inputs: list[dict[str, Any]],
-    ) -> list[torch.Tensor]:
+        context: ModelContext,
+    ) -> FeatureMaps:
         """Compute outputs from the backbone.
         If output_layers is set, then the outputs are multi-scale feature maps;
         otherwise, the model is being used for classification so the outputs are class
         probabilities and the loss.
-        Inputs:
-            inputs: input dicts that must include "image" key containing the image to
-                process.
+        Args:
+            context: the model context. Input dicts must include "image" key containing
+                the images to process.
+        Returns:
+            feature maps computed by the pre-trained model.
         """
-        x = torch.stack([inp["image"] for inp in inputs], dim=0)
+        x = torch.stack([inp["image"] for inp in context.inputs], dim=0)
         x = self.model.conv1(x)
         x = self.model.bn1(x)
         x = self.model.relu(x)
@@ -97,4 +102,4 @@ class Ssl4eoS12(torch.nn.Module):
         layer3 = self.model.layer3(layer2)
         layer4 = self.model.layer4(layer3)
         all_features = [layer1, layer2, layer3, layer4]
-        return [all_features[idx] for idx in self.output_layers]
+        return FeatureMaps([all_features[idx] for idx in self.output_layers])

rslearn/models/swin.py CHANGED Viewed

@@ -1,7 +1,5 @@
 """Swin Transformer."""
-from typing import Any
 import torch
 import torchvision
 from torchvision.models.swin_transformer import (
@@ -13,8 +11,12 @@ from torchvision.models.swin_transformer import (
     Swin_V2_T_Weights,
 )
+from rslearn.train.model_context import ModelContext
+from .component import FeatureExtractor, FeatureMaps, FeatureVector
-class Swin(torch.nn.Module):
+class Swin(FeatureExtractor):
     """A Swin Transformer model.
     It can either be used stand-alone for classification, or as a feature extractor in
@@ -34,9 +36,12 @@ class Swin(torch.nn.Module):
         Args:
             arch: the architecture, e.g. "swin_v2_b" (default) or "swin_t"
             pretrained: set True to use ImageNet pre-trained weights
-            input_channels: number of input channels (default 3)
+            input_channels: number of input channels (default 3). If not 3, the first
+                layer is updated and will be randomly initialized even if pretrained is
+                set.
             output_layers: list of layers to output, default use as classification
-                model. For feature extraction, [1, 3, 5, 7] is recommended.
+                model (output FeatureVector). For feature extraction, [1, 3, 5, 7] is
+                recommended.
             num_outputs: number of output logits, defaults to 1000 which matches the
                 pretrained models.
         """
@@ -130,19 +135,23 @@ class Swin(torch.nn.Module):
     def forward(
         self,
-        inputs: list[dict[str, Any]],
-    ) -> list[torch.Tensor]:
+        context: ModelContext,
+    ) -> FeatureVector | FeatureMaps:
         """Compute outputs from the backbone.
         If output_layers is set, then the outputs are multi-scale feature maps;
         otherwise, the model is being used for classification so the outputs are class
         probabilities and the loss.
-        Inputs:
-            inputs: input dicts that must include "image" key containing the image to
-                process.
+        Args:
+            context: the model context. Input dicts must include "image" key containing
+                the image to process.
+        Returns:
+            a FeatureVector if the configured output_layers is None, or a FeatureMaps
+                otherwise containing one feature map per configured output layer.
         """
-        images = torch.stack([inp["image"] for inp in inputs], dim=0)
+        images = torch.stack([inp["image"] for inp in context.inputs], dim=0)
         if self.output_layers:
             layer_features = []
@@ -150,7 +159,7 @@ class Swin(torch.nn.Module):
             for layer in self.model.features:
                 x = layer(x)
                 layer_features.append(x.permute(0, 3, 1, 2))
-            return [layer_features[idx] for idx in self.output_layers]
+            return FeatureMaps([layer_features[idx] for idx in self.output_layers])
         else:
-            return self.model(images)
+            return FeatureVector(self.model(images))

rslearn/models/terramind.py CHANGED Viewed

@@ -8,8 +8,11 @@ import torch.nn.functional as F
 from einops import rearrange
 from terratorch.registry import BACKBONE_REGISTRY
+from rslearn.train.model_context import ModelContext
 from rslearn.train.transforms.transform import Transform
+from .component import FeatureExtractor, FeatureMaps
 # TerraMind v1 provides two sizes: base and large
 class TerramindSize(str, Enum):
@@ -85,7 +88,7 @@ PRETRAINED_BANDS = {
 }
-class Terramind(torch.nn.Module):
+class Terramind(FeatureExtractor):
     """Terramind backbones."""
     def __init__(
@@ -123,21 +126,25 @@ class Terramind(torch.nn.Module):
         self.modalities = modalities
         self.do_resizing = do_resizing
-    def forward(self, inputs: list[dict[str, Any]]) -> list[torch.Tensor]:
+    def forward(self, context: ModelContext) -> FeatureMaps:
         """Forward pass for the Terramind model.
         Args:
-            inputs: input dicts that must include modalities as keys which are defined in the self.modalities list
+            context: the model context. Input dicts must include modalities as keys
+                which are defined in the self.modalities list.
         Returns:
-            List[torch.Tensor]: Single-scale feature tensors from the encoder.
+            a FeatureMaps with one feature map from the encoder, at 1/16 of the input
+                resolution.
         """
         model_inputs = {}
         for modality in self.modalities:
             # We assume the all the inputs include the same modalities
-            if modality not in inputs[0]:
+            if modality not in context.inputs[0]:
                 continue
-            cur = torch.stack([inp[modality] for inp in inputs], dim=0)  # (B, C, H, W)
+            cur = torch.stack(
+                [inp[modality] for inp in context.inputs], dim=0
+            )  # (B, C, H, W)
             if self.do_resizing and (
                 cur.shape[2] != IMAGE_SIZE or cur.shape[3] != IMAGE_SIZE
             ):
@@ -159,7 +166,17 @@ class Terramind(torch.nn.Module):
         image_features = self.model(model_inputs)[-1]
         batch_size, num_patches, _ = image_features.shape
         height, width = int(num_patches**0.5), int(num_patches**0.5)
-        return [rearrange(image_features, "b (h w) d -> b d h w", h=height, w=width)]
+        return FeatureMaps(
+            [
+                rearrange(
+                    image_features,
+                    "b (h w) d -> b d h w",
+                    b=batch_size,
+                    h=height,
+                    w=width,
+                )
+            ]
+        )
     def get_backbone_channels(self) -> list:
         """Returns the output channels of this model when used as a backbone.

rslearn/models/trunk.py CHANGED Viewed

@@ -7,6 +7,7 @@ import torch
 from rslearn.log_utils import get_logger
 from rslearn.models.task_embedding import BaseTaskEmbedding
+from rslearn.train.model_context import ModelOutput
 logger = get_logger(__name__)
@@ -32,10 +33,11 @@ class DecoderTrunkLayer(torch.nn.Module, ABC):
             dict with key "outputs" (output tensor of shape (batch_size, seq_len, dim))
             and optionally other keys.
         """
+        raise NotImplementedError
     @abstractmethod
     def apply_auxiliary_losses(
-        self, trunk_out: dict[str, Any], outs: dict[str, Any]
+        self, trunk_out: dict[str, Any], outs: ModelOutput
     ) -> None:
         """Apply auxiliary losses in-place.
@@ -43,6 +45,7 @@ class DecoderTrunkLayer(torch.nn.Module, ABC):
             trunk_out: The output of the trunk.
             outs: The output of the decoders, with key "loss_dict" containing the losses.
         """
+        raise NotImplementedError
 class DecoderTrunk(torch.nn.Module):
@@ -122,7 +125,7 @@ class DecoderTrunk(torch.nn.Module):
         return out
     def apply_auxiliary_losses(
-        self, trunk_out: dict[str, Any], outs: dict[str, Any]
+        self, trunk_out: dict[str, Any], outs: ModelOutput
     ) -> None:
         """Apply auxiliary losses in-place.
@@ -130,7 +133,7 @@ class DecoderTrunk(torch.nn.Module):
         Args:
             trunk_out: The output of the trunk.
-            outs: The output of the decoders, with key "loss_dict" containing the losses.
+            outs: The output of the decoders.
         """
         for layer in self.layers:
             layer.apply_auxiliary_losses(trunk_out, outs)

rslearn/models/unet.py CHANGED Viewed

@@ -5,8 +5,15 @@ from typing import Any
 import torch
 import torch.nn.functional as F
+from rslearn.train.model_context import ModelContext
-class UNetDecoder(torch.nn.Module):
+from .component import (
+    FeatureMaps,
+    IntermediateComponent,
+)
+class UNetDecoder(IntermediateComponent):
     """UNet-style decoder.
     It inputs multi-scale features. Starting from last (lowest resolution) feature map,
@@ -143,23 +150,25 @@ class UNetDecoder(torch.nn.Module):
             align_corners=False,
         )
-    def forward(
-        self, in_features: list[torch.Tensor], inputs: list[dict[str, Any]]
-    ) -> torch.Tensor:
+    def forward(self, intermediates: Any, context: ModelContext) -> FeatureMaps:
         """Compute output from multi-scale feature map.
         Args:
-            in_features: list of feature maps at different resolutions.
-            inputs: original inputs (ignored).
+            intermediates: the output from the previous model component, which must be a FeatureMaps.
+            context: the model context.
         Returns:
-            output image
+            output FeatureMaps consisting of one map. The embedding size is equal to the
+                configured out_channels.
         """
+        if not isinstance(intermediates, FeatureMaps):
+            raise ValueError("input to UNetDecoder must be a FeatureMaps")
         # Reverse the features since we will pass them in from lowest resolution to highest.
-        in_features = list(reversed(in_features))
+        in_features = list(reversed(intermediates.feature_maps))
         cur_features = self.layers[0](in_features[0])
         for in_feat, layer in zip(in_features[1:], self.layers[1:]):
             cur_features = layer(torch.cat([cur_features, in_feat], dim=1))
         if self.original_size_to_interpolate is not None:
             cur_features = self._resize(cur_features)
-        return cur_features
+        return FeatureMaps([cur_features])

rslearn/models/upsample.py CHANGED Viewed

@@ -1,9 +1,18 @@
 """An upsampling layer."""
+from typing import Any
 import torch
+from rslearn.train.model_context import ModelContext
+from .component import (
+    FeatureMaps,
+    IntermediateComponent,
+)
-class Upsample(torch.nn.Module):
+class Upsample(IntermediateComponent):
     """Upsamples each input feature map by the same factor."""
     def __init__(
@@ -20,16 +29,20 @@ class Upsample(torch.nn.Module):
         super().__init__()
         self.layer = torch.nn.Upsample(scale_factor=scale_factor, mode=mode)
-    def forward(
-        self, features: list[torch.Tensor], inputs: list[torch.Tensor]
-    ) -> list[torch.Tensor]:
-        """Upsample each feature map.
+    def forward(self, intermediates: Any, context: ModelContext) -> FeatureMaps:
+        """Upsample each feature map by scale_factor.
         Args:
-            features: list of feature maps at different resolutions.
-            inputs: original inputs (ignored).
+            intermediates: the output from the previous component, which must be a FeatureMaps.
+            context: the model context.
         Returns:
-            upsampled feature maps
+            upsampled feature maps.
         """
-        return [self.layer(feat_map) for feat_map in features]
+        if not isinstance(intermediates, FeatureMaps):
+            raise ValueError("input to Upsample must be a FeatureMaps")
+        upsampled_feat_maps = [
+            self.layer(feat_map) for feat_map in intermediates.feature_maps
+        ]
+        return FeatureMaps(upsampled_feat_maps)

rslearn/train/all_patches_dataset.py CHANGED Viewed

@@ -2,13 +2,15 @@
 import itertools
 from collections.abc import Iterable, Iterator
+from dataclasses import replace
 from typing import Any
 import shapely
 import torch
 from rslearn.dataset import Window
-from rslearn.train.dataset import ModelDataset
+from rslearn.train.dataset import DataInput, ModelDataset
+from rslearn.train.model_context import SampleMetadata
 from rslearn.utils.geometry import PixelBounds, STGeometry
@@ -32,22 +34,28 @@ def get_window_patch_options(
             bottommost patches may extend beyond the provided bounds.
     """
     # We stride the patches by patch_size - overlap_size until the last patch.
+    # We handle the first patch with a special case to ensure it is always used.
     # We handle the last patch with a special case to ensure it does not exceed the
     # window bounds. Instead, it may overlap the previous patch.
-    cols = list(
+    cols = [bounds[0]] + list(
         range(
-            bounds[0],
+            bounds[0] + patch_size[0],
             bounds[2] - patch_size[0],
             patch_size[0] - overlap_size[0],
         )
-    ) + [bounds[2] - patch_size[0]]
-    rows = list(
+    )
+    rows = [bounds[1]] + list(
         range(
-            bounds[1],
+            bounds[1] + patch_size[1],
             bounds[3] - patch_size[1],
             patch_size[1] - overlap_size[1],
         )
-    ) + [bounds[3] - patch_size[1]]
+    )
+    # Add last patches only if the input is larger than one patch.
+    if bounds[2] - patch_size[0] > bounds[0]:
+        cols.append(bounds[2] - patch_size[0])
+    if bounds[3] - patch_size[1] > bounds[1]:
+        rows.append(bounds[3] - patch_size[1])
     patch_bounds: list[PixelBounds] = []
     for col in cols:
@@ -60,13 +68,17 @@ def pad_slice_protect(
     raw_inputs: dict[str, Any],
     passthrough_inputs: dict[str, Any],
     patch_size: tuple[int, int],
+    inputs: dict[str, DataInput],
 ) -> tuple[dict[str, Any], dict[str, Any]]:
     """Pad tensors in-place by patch size to protect slicing near right/bottom edges.
+    The padding is scaled based on each input's resolution_factor.
     Args:
         raw_inputs: the raw inputs to pad.
         passthrough_inputs: the passthrough inputs to pad.
-        patch_size: the size of the patches to extract.
+        patch_size: the size of the patches to extract (at window resolution).
+        inputs: the DataInput definitions, used to get resolution_factor per input.
     Returns:
         a tuple of (raw_inputs, passthrough_inputs).
@@ -75,8 +87,14 @@ def pad_slice_protect(
         for input_name, value in list(d.items()):
             if not isinstance(value, torch.Tensor):
                 continue
+            # Get resolution scale for this input
+            rf = inputs[input_name].resolution_factor
+            scale = rf.numerator / rf.denominator
+            # Scale the padding amount
+            scaled_pad_x = int(patch_size[0] * scale)
+            scaled_pad_y = int(patch_size[1] * scale)
             d[input_name] = torch.nn.functional.pad(
-                value, pad=(0, patch_size[0], 0, patch_size[1])
+                value, pad=(0, scaled_pad_x, 0, scaled_pad_y)
             )
     return raw_inputs, passthrough_inputs
@@ -121,6 +139,7 @@ class IterableAllPatchesDataset(torch.utils.data.IterableDataset):
         self.rank = rank
         self.world_size = world_size
         self.windows = self.dataset.get_dataset_examples()
+        self.inputs = dataset.inputs
     def set_name(self, name: str) -> None:
         """Sets dataset name.
@@ -218,7 +237,7 @@ class IterableAllPatchesDataset(torch.utils.data.IterableDataset):
     def __iter__(
         self,
-    ) -> Iterator[tuple[dict[str, Any], dict[str, Any], dict[str, Any]]]:
+    ) -> Iterator[tuple[dict[str, Any], dict[str, Any], SampleMetadata]]:
         """Iterate over all patches in each element of the underlying ModelDataset."""
         # Iterate over the window IDs until we have returned enough samples.
         window_ids, num_samples_needed = self._get_worker_iteration_data()
@@ -229,12 +248,14 @@ class IterableAllPatchesDataset(torch.utils.data.IterableDataset):
                 raw_inputs, passthrough_inputs, metadata = self.dataset.get_raw_inputs(
                     window_id
                 )
-                bounds = metadata["bounds"]
+                bounds = metadata.patch_bounds
                 # For simplicity, pad tensors by patch size to ensure that any patch bounds
                 # extending outside the window bounds will not have issues when we slice
-                # the tensors later.
-                pad_slice_protect(raw_inputs, passthrough_inputs, self.patch_size)
+                # the tensors later. Padding is scaled per-input based on resolution_factor.
+                pad_slice_protect(
+                    raw_inputs, passthrough_inputs, self.patch_size, self.inputs
+                )
                 # Now iterate over the patches and extract/yield the crops.
                 # Note that, in case user is leveraging RslearnWriter, it is important that
@@ -244,7 +265,7 @@ class IterableAllPatchesDataset(torch.utils.data.IterableDataset):
                 )
                 for patch_idx, patch_bounds in enumerate(patches):
                     cur_geom = STGeometry(
-                        metadata["projection"], shapely.box(*patch_bounds), None
+                        metadata.projection, shapely.box(*patch_bounds), None
                     )
                     start_offset = (
                         patch_bounds[0] - bounds[0],
@@ -256,15 +277,28 @@ class IterableAllPatchesDataset(torch.utils.data.IterableDataset):
                     )
                     # Define a helper function to handle each input dict.
+                    # Crop coordinates are scaled based on each input's resolution_factor.
                     def crop_input_dict(d: dict[str, Any]) -> dict[str, Any]:
                         cropped = {}
                         for input_name, value in d.items():
                             if isinstance(value, torch.Tensor):
-                                # Crop the CHW tensor.
+                                # Get resolution scale for this input
+                                rf = self.inputs[input_name].resolution_factor
+                                scale = rf.numerator / rf.denominator
+                                # Scale the crop coordinates
+                                scaled_start = (
+                                    int(start_offset[0] * scale),
+                                    int(start_offset[1] * scale),
+                                )
+                                scaled_end = (
+                                    int(end_offset[0] * scale),
+                                    int(end_offset[1] * scale),
+                                )
+                                # Crop the CHW tensor with scaled coordinates.
                                 cropped[input_name] = value[
                                     :,
-                                    start_offset[1] : end_offset[1],
-                                    start_offset[0] : end_offset[0],
+                                    scaled_start[1] : scaled_end[1],
+                                    scaled_start[0] : scaled_end[0],
                                 ].clone()
                             elif isinstance(value, list):
                                 cropped[input_name] = [
@@ -282,10 +316,12 @@ class IterableAllPatchesDataset(torch.utils.data.IterableDataset):
                     cur_passthrough_inputs = crop_input_dict(passthrough_inputs)
                     # Adjust the metadata as well.
-                    cur_metadata = metadata.copy()
-                    cur_metadata["bounds"] = patch_bounds
-                    cur_metadata["patch_idx"] = patch_idx
-                    cur_metadata["num_patches"] = len(patches)
+                    cur_metadata = replace(
+                        metadata,
+                        patch_bounds=patch_bounds,
+                        patch_idx=patch_idx,
+                        num_patches_in_window=len(patches),
+                    )
                     # Now we can compute input and target dicts via the task.
                     input_dict, target_dict = self.dataset.task.process_inputs(
@@ -297,7 +333,6 @@ class IterableAllPatchesDataset(torch.utils.data.IterableDataset):
                     input_dict, target_dict = self.dataset.transforms(
                         input_dict, target_dict
                     )
-                    input_dict["dataset_source"] = self.dataset.name
                     if num_samples_returned < num_samples_needed:
                         yield input_dict, target_dict, cur_metadata
@@ -345,8 +380,9 @@ class InMemoryAllPatchesDataset(torch.utils.data.Dataset):
             round(self.patch_size[1] * overlap_ratio),
         )
         self.windows = self.dataset.get_dataset_examples()
+        self.inputs = dataset.inputs
         self.window_cache: dict[
-            int, tuple[dict[str, Any], dict[str, Any], dict[str, Any]]
+            int, tuple[dict[str, Any], dict[str, Any], SampleMetadata]
         ] = {}
         # Precompute the batch boundaries for each window
@@ -360,7 +396,7 @@ class InMemoryAllPatchesDataset(torch.utils.data.Dataset):
     def get_raw_inputs(
         self, index: int
-    ) -> tuple[dict[str, Any], dict[str, Any], dict[str, Any]]:
+    ) -> tuple[dict[str, Any], dict[str, Any], SampleMetadata]:
         """Get the raw inputs for a single patch. Retrieve from cache if possible.
         Also crops/pads the tensors by patch size to protect slicing near right/bottom edges.
@@ -375,26 +411,41 @@ class InMemoryAllPatchesDataset(torch.utils.data.Dataset):
             return self.window_cache[index]
         raw_inputs, passthrough_inputs, metadata = self.dataset.get_raw_inputs(index)
-        pad_slice_protect(raw_inputs, passthrough_inputs, self.patch_size)
+        pad_slice_protect(raw_inputs, passthrough_inputs, self.patch_size, self.inputs)
         self.window_cache[index] = (raw_inputs, passthrough_inputs, metadata)
         return self.window_cache[index]
-    @staticmethod
     def _crop_input_dict(
+        self,
         d: dict[str, Any],
         start_offset: tuple[int, int],
         end_offset: tuple[int, int],
         cur_geom: STGeometry,
     ) -> dict[str, Any]:
-        """Crop a dictionary of inputs to the given bounds."""
+        """Crop a dictionary of inputs to the given bounds.
+        Crop coordinates are scaled based on each input's resolution_factor.
+        """
         cropped = {}
         for input_name, value in d.items():
             if isinstance(value, torch.Tensor):
+                # Get resolution scale for this input
+                rf = self.inputs[input_name].resolution_factor
+                scale = rf.numerator / rf.denominator
+                # Scale the crop coordinates
+                scaled_start = (
+                    int(start_offset[0] * scale),
+                    int(start_offset[1] * scale),
+                )
+                scaled_end = (
+                    int(end_offset[0] * scale),
+                    int(end_offset[1] * scale),
+                )
                 cropped[input_name] = value[
                     :,
-                    start_offset[1] : end_offset[1],
-                    start_offset[0] : end_offset[0],
+                    scaled_start[1] : scaled_end[1],
+                    scaled_start[0] : scaled_end[0],
                 ].clone()
             elif isinstance(value, list):
                 cropped[input_name] = [
@@ -410,13 +461,13 @@ class InMemoryAllPatchesDataset(torch.utils.data.Dataset):
     def __getitem__(
         self, index: int
-    ) -> tuple[dict[str, Any], dict[str, Any], dict[str, Any]]:
+    ) -> tuple[dict[str, Any], dict[str, Any], SampleMetadata]:
         """Return (input_dict, target_dict, metadata) for a single flattened patch."""
         (window_id, patch_bounds, (patch_idx, num_patches)) = self.patches[index]
         raw_inputs, passthrough_inputs, metadata = self.get_raw_inputs(window_id)
-        bounds = metadata["bounds"]
+        bounds = metadata.patch_bounds
-        cur_geom = STGeometry(metadata["projection"], shapely.box(*patch_bounds), None)
+        cur_geom = STGeometry(metadata.projection, shapely.box(*patch_bounds), None)
         start_offset = (patch_bounds[0] - bounds[0], patch_bounds[1] - bounds[1])
         end_offset = (patch_bounds[2] - bounds[0], patch_bounds[3] - bounds[1])
@@ -428,10 +479,12 @@ class InMemoryAllPatchesDataset(torch.utils.data.Dataset):
         )
         # Adjust the metadata as well.
-        cur_metadata = metadata.copy()
-        cur_metadata["bounds"] = patch_bounds
-        cur_metadata["patch_idx"] = patch_idx
-        cur_metadata["num_patches"] = num_patches
+        cur_metadata = replace(
+            metadata,
+            patch_bounds=patch_bounds,
+            patch_idx=patch_idx,
+            num_patches_in_window=num_patches,
+        )
         # Now we can compute input and target dicts via the task.
         input_dict, target_dict = self.dataset.task.process_inputs(
@@ -441,7 +494,6 @@ class InMemoryAllPatchesDataset(torch.utils.data.Dataset):
         )
         input_dict.update(cur_passthrough_inputs)
         input_dict, target_dict = self.dataset.transforms(input_dict, target_dict)
-        input_dict["dataset_source"] = self.dataset.name
         return input_dict, target_dict, cur_metadata

rslearn 0.0.17__py3-none-any.whl → 0.0.19__py3-none-any.whl

rslearn 0.0.17py3-none-any.whl → 0.0.19py3-none-any.whl