PyPI - rslearn - Versions diffs - 0.0.25__py3-none-any.whl → 0.0.27__py3-none-any.whl - Mend

rslearn 0.0.25py3-none-any.whl → 0.0.27py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (72) hide show

rslearn/config/dataset.py +30 -23
rslearn/data_sources/__init__.py +2 -0
rslearn/data_sources/aws_landsat.py +44 -161
rslearn/data_sources/aws_open_data.py +2 -4
rslearn/data_sources/aws_sentinel1.py +1 -3
rslearn/data_sources/aws_sentinel2_element84.py +54 -165
rslearn/data_sources/climate_data_store.py +1 -3
rslearn/data_sources/copernicus.py +1 -2
rslearn/data_sources/data_source.py +1 -1
rslearn/data_sources/direct_materialize_data_source.py +336 -0
rslearn/data_sources/earthdaily.py +52 -155
rslearn/data_sources/earthdatahub.py +425 -0
rslearn/data_sources/eurocrops.py +1 -2
rslearn/data_sources/gcp_public_data.py +1 -2
rslearn/data_sources/google_earth_engine.py +1 -2
rslearn/data_sources/hf_srtm.py +595 -0
rslearn/data_sources/local_files.py +3 -3
rslearn/data_sources/openstreetmap.py +1 -1
rslearn/data_sources/planet.py +1 -2
rslearn/data_sources/planet_basemap.py +1 -2
rslearn/data_sources/planetary_computer.py +183 -186
rslearn/data_sources/soilgrids.py +3 -3
rslearn/data_sources/stac.py +1 -2
rslearn/data_sources/usda_cdl.py +1 -3
rslearn/data_sources/usgs_landsat.py +7 -254
rslearn/data_sources/utils.py +204 -64
rslearn/data_sources/worldcereal.py +1 -1
rslearn/data_sources/worldcover.py +1 -1
rslearn/data_sources/worldpop.py +1 -1
rslearn/data_sources/xyz_tiles.py +5 -9
rslearn/dataset/materialize.py +5 -1
rslearn/models/clay/clay.py +3 -3
rslearn/models/concatenate_features.py +6 -1
rslearn/models/detr/detr.py +4 -1
rslearn/models/dinov3.py +0 -1
rslearn/models/olmoearth_pretrain/model.py +3 -1
rslearn/models/pooling_decoder.py +1 -1
rslearn/models/prithvi.py +0 -1
rslearn/models/simple_time_series.py +97 -35
rslearn/train/{all_patches_dataset.py → all_crops_dataset.py} +120 -117
rslearn/train/data_module.py +32 -27
rslearn/train/dataset.py +260 -117
rslearn/train/dataset_index.py +156 -0
rslearn/train/lightning_module.py +1 -1
rslearn/train/model_context.py +19 -3
rslearn/train/prediction_writer.py +69 -41
rslearn/train/tasks/classification.py +1 -1
rslearn/train/tasks/detection.py +5 -5
rslearn/train/tasks/per_pixel_regression.py +13 -13
rslearn/train/tasks/regression.py +1 -1
rslearn/train/tasks/segmentation.py +26 -13
rslearn/train/transforms/concatenate.py +17 -27
rslearn/train/transforms/crop.py +8 -19
rslearn/train/transforms/flip.py +4 -10
rslearn/train/transforms/mask.py +9 -15
rslearn/train/transforms/normalize.py +31 -82
rslearn/train/transforms/pad.py +7 -13
rslearn/train/transforms/resize.py +5 -22
rslearn/train/transforms/select_bands.py +16 -36
rslearn/train/transforms/sentinel1.py +4 -16
rslearn/utils/__init__.py +2 -0
rslearn/utils/geometry.py +21 -0
rslearn/utils/m2m_api.py +251 -0
rslearn/utils/retry_session.py +43 -0
{rslearn-0.0.25.dist-info → rslearn-0.0.27.dist-info}/METADATA +6 -3
{rslearn-0.0.25.dist-info → rslearn-0.0.27.dist-info}/RECORD +71 -66
rslearn/data_sources/earthdata_srtm.py +0 -282
{rslearn-0.0.25.dist-info → rslearn-0.0.27.dist-info}/WHEEL +0 -0
{rslearn-0.0.25.dist-info → rslearn-0.0.27.dist-info}/entry_points.txt +0 -0
{rslearn-0.0.25.dist-info → rslearn-0.0.27.dist-info}/licenses/LICENSE +0 -0
{rslearn-0.0.25.dist-info → rslearn-0.0.27.dist-info}/licenses/NOTICE +0 -0
{rslearn-0.0.25.dist-info → rslearn-0.0.27.dist-info}/top_level.txt +0 -0

rslearn/train/lightning_module.py CHANGED Viewed

@@ -365,7 +365,7 @@ class RslearnLightningModule(L.LightningModule):
                 for image_suffix, image in images.items():
                     out_fname = os.path.join(
                         self.visualize_dir,
-                        f"{metadata.window_name}_{metadata.patch_bounds[0]}_{metadata.patch_bounds[1]}_{image_suffix}.png",
+                        f"{metadata.window_name}_{metadata.crop_bounds[0]}_{metadata.crop_bounds[1]}_{image_suffix}.png",
                     )
                     Image.fromarray(image).save(out_fname)

rslearn/train/model_context.py CHANGED Viewed

@@ -43,6 +43,22 @@ class RasterImage:
             raise ValueError(f"Expected a single timestep, got {self.image.shape[1]}")
         return self.image[:, 0]
+    def get_hw_tensor(self) -> torch.Tensor:
+        """Get a 2D HW tensor from a single-channel, single-timestep RasterImage.
+        This function checks that C=1 and T=1, then returns the HW tensor.
+        Useful for per-pixel labels like segmentation masks.
+        """
+        if self.image.shape[0] != 1:
+            raise ValueError(
+                f"Expected single channel (C=1), got {self.image.shape[0]}"
+            )
+        if self.image.shape[1] != 1:
+            raise ValueError(
+                f"Expected single timestep (T=1), got {self.image.shape[1]}"
+            )
+        return self.image[0, 0]
 @dataclass
 class SampleMetadata:
@@ -51,9 +67,9 @@ class SampleMetadata:
     window_group: str
     window_name: str
     window_bounds: PixelBounds
-    patch_bounds: PixelBounds
-    patch_idx: int
-    num_patches_in_window: int
+    crop_bounds: PixelBounds
+    crop_idx: int
+    num_crops_in_window: int
     time_range: tuple[datetime, datetime] | None
     projection: Projection

rslearn/train/prediction_writer.py CHANGED Viewed

@@ -1,6 +1,7 @@
 """rslearn PredictionWriter implementation."""
 import json
+import warnings
 from collections.abc import Iterable, Sequence
 from dataclasses import dataclass
 from pathlib import Path
@@ -39,20 +40,20 @@ logger = get_logger(__name__)
 @dataclass
-class PendingPatchOutput:
-    """A patch output that hasn't been merged yet."""
+class PendingCropOutput:
+    """A crop output that hasn't been merged yet."""
     bounds: PixelBounds
     output: Any
-class PatchPredictionMerger:
-    """Base class for merging predictions from multiple patches."""
+class CropPredictionMerger:
+    """Base class for merging predictions from multiple crops."""
     def merge(
         self,
         window: Window,
-        outputs: Sequence[PendingPatchOutput],
+        outputs: Sequence[PendingCropOutput],
         layer_config: LayerConfig,
     ) -> Any:
         """Merge the outputs.
@@ -68,39 +69,60 @@ class PatchPredictionMerger:
         raise NotImplementedError
-class VectorMerger(PatchPredictionMerger):
+class VectorMerger(CropPredictionMerger):
     """Merger for vector data that simply concatenates the features."""
     def merge(
         self,
         window: Window,
-        outputs: Sequence[PendingPatchOutput],
+        outputs: Sequence[PendingCropOutput],
         layer_config: LayerConfig,
     ) -> list[Feature]:
         """Concatenate the vector features."""
         return [feat for output in outputs for feat in output.output]
-class RasterMerger(PatchPredictionMerger):
+class RasterMerger(CropPredictionMerger):
     """Merger for raster data that copies the rasters to the output."""
-    def __init__(self, padding: int | None = None, downsample_factor: int = 1):
+    def __init__(
+        self,
+        overlap_pixels: int | None = None,
+        downsample_factor: int = 1,
+        # Deprecated parameter (for backwards compatibility)
+        padding: int | None = None,
+    ):
         """Create a new RasterMerger.
         Args:
-            padding: the padding around the individual patch outputs to remove. This is
-                typically used when leveraging overlapping patches. Portions of outputs
-                at the border of the window will still be retained.
+            overlap_pixels: the number of pixels shared between adjacent crops during
+                sliding window inference. Half of this overlap is removed from each
+                crop during merging (except at window boundaries where the full crop
+                is retained).
             downsample_factor: the factor by which the rasters output by the task are
                 lower in resolution relative to the window resolution.
+            padding: deprecated, use overlap_pixels instead. The old padding value
+                equals overlap_pixels // 2.
         """
-        self.padding = padding
+        # Handle deprecated padding parameter
+        if padding is not None:
+            warnings.warn(
+                "padding is deprecated, use overlap_pixels instead. "
+                "Note: overlap_pixels = padding * 2",
+                FutureWarning,
+                stacklevel=2,
+            )
+            if overlap_pixels is not None:
+                raise ValueError("Cannot specify both padding and overlap_pixels")
+            overlap_pixels = padding * 2
+        self.overlap_pixels = overlap_pixels
         self.downsample_factor = downsample_factor
     def merge(
         self,
         window: Window,
-        outputs: Sequence[PendingPatchOutput],
+        outputs: Sequence[PendingCropOutput],
         layer_config: LayerConfig,
     ) -> npt.NDArray:
         """Merge the raster outputs."""
@@ -114,6 +136,12 @@ class RasterMerger(PatchPredictionMerger):
             dtype=layer_config.band_sets[0].dtype.get_numpy_dtype(),
         )
+        # Compute how many pixels to trim from each side.
+        # We remove half of the overlap from each side (not at window boundaries).
+        trim_pixels = (
+            self.overlap_pixels // 2 if self.overlap_pixels is not None else None
+        )
         # Ensure the outputs are sorted by height then width.
         # This way when we merge we can be sure that outputs that are lower or further
         # to the right will overwrite earlier outputs.
@@ -123,18 +151,18 @@ class RasterMerger(PatchPredictionMerger):
         for output in sorted_outputs:
             # So now we just need to compute the src_offset to copy.
             # If the output is not on the left or top boundary, then we should apply
-            # the padding (if set).
+            # the trim (if set).
             src = output.output
             src_offset = (
                 output.bounds[0] // self.downsample_factor,
                 output.bounds[1] // self.downsample_factor,
             )
-            if self.padding is not None and output.bounds[0] != window.bounds[0]:
-                src = src[:, :, self.padding :]
-                src_offset = (src_offset[0] + self.padding, src_offset[1])
-            if self.padding is not None and output.bounds[1] != window.bounds[1]:
-                src = src[:, self.padding :, :]
-                src_offset = (src_offset[0], src_offset[1] + self.padding)
+            if trim_pixels is not None and output.bounds[0] != window.bounds[0]:
+                src = src[:, :, trim_pixels:]
+                src_offset = (src_offset[0] + trim_pixels, src_offset[1])
+            if trim_pixels is not None and output.bounds[1] != window.bounds[1]:
+                src = src[:, trim_pixels:, :]
+                src_offset = (src_offset[0], src_offset[1] + trim_pixels)
             copy_spatial_array(
                 src=src,
@@ -162,7 +190,7 @@ class RslearnWriter(BasePredictionWriter):
         output_layer: str,
         path_options: dict[str, Any] | None = None,
         selector: list[str] | None = None,
-        merger: PatchPredictionMerger | None = None,
+        merger: CropPredictionMerger | None = None,
         output_path: str | Path | None = None,
         layer_config: LayerConfig | None = None,
         storage_config: StorageConfig | None = None,
@@ -175,7 +203,7 @@ class RslearnWriter(BasePredictionWriter):
             path_options: additional options for path to pass to fsspec
             selector: keys to access the desired output in the output dict if needed.
                 e.g ["key1", "key2"] gets output["key1"]["key2"]
-            merger: merger to use to merge outputs from overlapped patches.
+            merger: merger to use to merge outputs from overlapped crops.
             output_path: optional custom path for writing predictions. If provided,
                 predictions will be written to this path instead of deriving from dataset path.
             layer_config: optional layer configuration. If provided, this config will be
@@ -217,9 +245,9 @@ class RslearnWriter(BasePredictionWriter):
             self.merger = VectorMerger()
         # Map from window name to pending data to write.
-        # This is used when windows are split up into patches, so the data from all the
-        # patches of each window need to be reconstituted.
-        self.pending_outputs: dict[str, list[PendingPatchOutput]] = {}
+        # This is used when windows are split up into crops, so the data from all the
+        # crops of each window need to be reconstituted.
+        self.pending_outputs: dict[str, list[PendingCropOutput]] = {}
     def _get_layer_config_and_dataset_storage(
         self,
@@ -327,7 +355,7 @@ class RslearnWriter(BasePredictionWriter):
                 will be processed by the task to obtain a vector (list[Feature]) or
                 raster (npt.NDArray) output.
             metadatas: corresponding list of metadatas from the batch describing the
-                patches that were processed.
+                crops that were processed.
         """
         # Process the predictions into outputs that can be written.
         outputs: list = [
@@ -349,17 +377,17 @@ class RslearnWriter(BasePredictionWriter):
             )
             self.process_output(
                 window,
-                metadata.patch_idx,
-                metadata.num_patches_in_window,
-                metadata.patch_bounds,
+                metadata.crop_idx,
+                metadata.num_crops_in_window,
+                metadata.crop_bounds,
                 output,
             )
     def process_output(
         self,
         window: Window,
-        patch_idx: int,
-        num_patches: int,
+        crop_idx: int,
+        num_crops: int,
         cur_bounds: PixelBounds,
         output: npt.NDArray | list[Feature],
     ) -> None:
@@ -367,28 +395,28 @@ class RslearnWriter(BasePredictionWriter):
         Args:
             window: the window that the output pertains to.
-            patch_idx: the index of this patch for the window.
-            num_patches: the total number of patches to be processed for the window.
-            cur_bounds: the bounds of the current patch.
+            crop_idx: the index of this crop for the window.
+            num_crops: the total number of crops to be processed for the window.
+            cur_bounds: the bounds of the current crop.
             output: the output data.
         """
-        # Incorporate the output into our list of pending patch outputs.
+        # Incorporate the output into our list of pending crop outputs.
         if window.name not in self.pending_outputs:
             self.pending_outputs[window.name] = []
-        self.pending_outputs[window.name].append(PendingPatchOutput(cur_bounds, output))
+        self.pending_outputs[window.name].append(PendingCropOutput(cur_bounds, output))
         logger.debug(
-            f"Stored PendingPatchOutput for patch #{patch_idx}/{num_patches} at window {window.name}"
+            f"Stored PendingCropOutput for crop #{crop_idx}/{num_crops} at window {window.name}"
         )
-        if patch_idx < num_patches - 1:
+        if crop_idx < num_crops - 1:
             return
-        # This is the last patch so it's time to write it.
+        # This is the last crop so it's time to write it.
         # First get the pending output and clear it.
         pending_output = self.pending_outputs[window.name]
         del self.pending_outputs[window.name]
-        # Merge outputs from overlapped patches if merger is set.
+        # Merge outputs from overlapped crops if merger is set.
         logger.debug(f"Merging and writing for window {window.name}")
         merged_output = self.merger.merge(window, pending_output, self.layer_config)

rslearn/train/tasks/classification.py CHANGED Viewed

@@ -201,7 +201,7 @@ class ClassificationTask(BasicTask):
         feature = Feature(
             STGeometry(
                 metadata.projection,
-                shapely.Point(metadata.patch_bounds[0], metadata.patch_bounds[1]),
+                shapely.Point(metadata.crop_bounds[0], metadata.crop_bounds[1]),
                 None,
             ),
             {

rslearn/train/tasks/detection.py CHANGED Viewed

@@ -128,7 +128,7 @@ class DetectionTask(BasicTask):
         if not load_targets:
             return {}, {}
-        bounds = metadata.patch_bounds
+        bounds = metadata.crop_bounds
         boxes = []
         class_labels = []
@@ -244,10 +244,10 @@ class DetectionTask(BasicTask):
         features = []
         for box, class_id, score in zip(boxes, class_ids, scores):
             shp = shapely.box(
-                metadata.patch_bounds[0] + float(box[0]),
-                metadata.patch_bounds[1] + float(box[1]),
-                metadata.patch_bounds[0] + float(box[2]),
-                metadata.patch_bounds[1] + float(box[3]),
+                metadata.crop_bounds[0] + float(box[0]),
+                metadata.crop_bounds[1] + float(box[1]),
+                metadata.crop_bounds[0] + float(box[2]),
+                metadata.crop_bounds[1] + float(box[3]),
             )
             geom = STGeometry(metadata.projection, shp, None)
             properties: dict[str, Any] = {

rslearn/train/tasks/per_pixel_regression.py CHANGED Viewed

@@ -66,20 +66,18 @@ class PerPixelRegressionTask(BasicTask):
             return {}, {}
         assert isinstance(raw_inputs["targets"], RasterImage)
-        assert raw_inputs["targets"].image.shape[0] == 1
-        assert raw_inputs["targets"].image.shape[1] == 1
-        labels = raw_inputs["targets"].image[0, 0, :, :].float() * self.scale_factor
+        labels = raw_inputs["targets"].get_hw_tensor().float() * self.scale_factor
         if self.nodata_value is not None:
-            valid = (
-                raw_inputs["targets"].image[0, 0, :, :] != self.nodata_value
-            ).float()
+            valid = (raw_inputs["targets"].get_hw_tensor() != self.nodata_value).float()
         else:
             valid = torch.ones(labels.shape, dtype=torch.float32)
+        # Wrap in RasterImage with CTHW format (C=1, T=1) so values and valid can be
+        # used in image transforms.
         return {}, {
-            "values": labels,
-            "valid": valid,
+            "values": RasterImage(labels[None, None, :, :], timestamps=None),
+            "valid": RasterImage(valid[None, None, :, :], timestamps=None),
         }
     def process_output(
@@ -121,7 +119,7 @@ class PerPixelRegressionTask(BasicTask):
         image = super().visualize(input_dict, target_dict, output)["image"]
         if target_dict is None:
             raise ValueError("target_dict is required for visualization")
-        gt_values = target_dict["classes"].cpu().numpy()
+        gt_values = target_dict["values"].get_hw_tensor().cpu().numpy()
         pred_values = output.cpu().numpy()[0, :, :]
         gt_vis = np.clip(gt_values * 255, 0, 255).astype(np.uint8)
         pred_vis = np.clip(pred_values * 255, 0, 255).astype(np.uint8)
@@ -210,8 +208,10 @@ class PerPixelRegressionHead(Predictor):
         losses = {}
         if targets:
-            labels = torch.stack([target["values"] for target in targets])
-            mask = torch.stack([target["valid"] for target in targets])
+            labels = torch.stack(
+                [target["values"].get_hw_tensor() for target in targets]
+            )
+            mask = torch.stack([target["valid"].get_hw_tensor() for target in targets])
             if self.loss_mode == "mse":
                 scores = torch.square(outputs - labels)
@@ -262,14 +262,14 @@ class PerPixelRegressionMetricWrapper(Metric):
         """
         if not isinstance(preds, torch.Tensor):
             preds = torch.stack(preds)
-        labels = torch.stack([target["values"] for target in targets])
+        labels = torch.stack([target["values"].get_hw_tensor() for target in targets])
         # Sub-select the valid labels.
         # We flatten the prediction and label images at valid pixels.
         if len(preds.shape) == 4:
             assert preds.shape[1] == 1
             preds = preds[:, 0, :, :]
-        mask = torch.stack([target["valid"] > 0 for target in targets])
+        mask = torch.stack([target["valid"].get_hw_tensor() > 0 for target in targets])
         preds = preds[mask]
         labels = labels[mask]
         if len(preds) == 0:

rslearn/train/tasks/regression.py CHANGED Viewed

@@ -130,7 +130,7 @@ class RegressionTask(BasicTask):
         feature = Feature(
             STGeometry(
                 metadata.projection,
-                shapely.Point(metadata.patch_bounds[0], metadata.patch_bounds[1]),
+                shapely.Point(metadata.crop_bounds[0], metadata.crop_bounds[1]),
                 None,
             ),
             {

rslearn/train/tasks/segmentation.py CHANGED Viewed

@@ -128,9 +128,7 @@ class SegmentationTask(BasicTask):
             return {}, {}
         assert isinstance(raw_inputs["targets"], RasterImage)
-        assert raw_inputs["targets"].image.shape[0] == 1
-        assert raw_inputs["targets"].image.shape[1] == 1
-        labels = raw_inputs["targets"].image[0, 0, :, :].long()
+        labels = raw_inputs["targets"].get_hw_tensor().long()
         if self.class_id_mapping is not None:
             new_labels = labels.clone()
@@ -146,9 +144,11 @@ class SegmentationTask(BasicTask):
         else:
             valid = torch.ones(labels.shape, dtype=torch.float32)
+        # Wrap in RasterImage with CTHW format (C=1, T=1) so classes and valid can be
+        # used in image transforms.
         return {}, {
-            "classes": labels,
-            "valid": valid,
+            "classes": RasterImage(labels[None, None, :, :], timestamps=None),
+            "valid": RasterImage(valid[None, None, :, :], timestamps=None),
         }
     def process_output(
@@ -206,7 +206,7 @@ class SegmentationTask(BasicTask):
         image = super().visualize(input_dict, target_dict, output)["image"]
         if target_dict is None:
             raise ValueError("target_dict is required for visualization")
-        gt_classes = target_dict["classes"].cpu().numpy()
+        gt_classes = target_dict["classes"].get_hw_tensor().cpu().numpy()
         pred_classes = output.cpu().numpy().argmax(axis=0)
         gt_vis = np.zeros((gt_classes.shape[0], gt_classes.shape[1], 3), dtype=np.uint8)
         pred_vis = np.zeros(
@@ -291,12 +291,19 @@ class SegmentationTask(BasicTask):
 class SegmentationHead(Predictor):
     """Head for segmentation task."""
-    def __init__(self, weights: list[float] | None = None, dice_loss: bool = False):
+    def __init__(
+        self,
+        weights: list[float] | None = None,
+        dice_loss: bool = False,
+        temperature: float = 1.0,
+    ):
         """Initialize a new SegmentationTask.
         Args:
             weights: weights for cross entropy loss (Tensor of size C)
             dice_loss: weather to add dice loss to cross entropy
+            temperature: temperature scaling for softmax, does not affect the loss,
+                only the predictor outputs
         """
         super().__init__()
         if weights is not None:
@@ -304,6 +311,7 @@ class SegmentationHead(Predictor):
         else:
             self.weights = None
         self.dice_loss = dice_loss
+        self.temperature = temperature
     def forward(
         self,
@@ -332,12 +340,16 @@ class SegmentationHead(Predictor):
             )
         logits = intermediates.feature_maps[0]
-        outputs = torch.nn.functional.softmax(logits, dim=1)
+        outputs = torch.nn.functional.softmax(logits / self.temperature, dim=1)
         losses = {}
         if targets:
-            labels = torch.stack([target["classes"] for target in targets], dim=0)
-            mask = torch.stack([target["valid"] for target in targets], dim=0)
+            labels = torch.stack(
+                [target["classes"].get_hw_tensor() for target in targets], dim=0
+            )
+            mask = torch.stack(
+                [target["valid"].get_hw_tensor() for target in targets], dim=0
+            )
             per_pixel_loss = torch.nn.functional.cross_entropy(
                 logits, labels, weight=self.weights, reduction="none"
             )
@@ -350,7 +362,8 @@ class SegmentationHead(Predictor):
                 # the summed mask loss be zero.
                 losses["cls"] = torch.sum(per_pixel_loss * mask)
             if self.dice_loss:
-                dice_loss = DiceLoss()(outputs, labels, mask)
+                softmax_woT = torch.nn.functional.softmax(logits, dim=1)
+                dice_loss = DiceLoss()(softmax_woT, labels, mask)
                 losses["dice"] = dice_loss
         return ModelOutput(
@@ -401,12 +414,12 @@ class SegmentationMetric(Metric):
         """
         if not isinstance(preds, torch.Tensor):
             preds = torch.stack(preds)
-        labels = torch.stack([target["classes"] for target in targets])
+        labels = torch.stack([target["classes"].get_hw_tensor() for target in targets])
         # Sub-select the valid labels.
         # We flatten the prediction and label images at valid pixels.
         # Prediction is changed from BCHW to BHWC so we can select the valid BHW mask.
-        mask = torch.stack([target["valid"] > 0 for target in targets])
+        mask = torch.stack([target["valid"].get_hw_tensor() > 0 for target in targets])
         preds = preds.permute(0, 2, 3, 1)[mask]
         labels = labels[mask]
         if len(preds) == 0:

rslearn/train/transforms/concatenate.py CHANGED Viewed

@@ -54,36 +54,26 @@ class Concatenate(Transform):
             target_dict: the target
         Returns:
-            concatenated (input_dicts, target_dicts) tuple. If one of the
-            specified inputs is a RasterImage, a RasterImage will be returned.
-            Otherwise it will be a torch.Tensor.
+            (input_dicts, target_dicts) where the entry corresponding to
+            output_selector contains the concatenated RasterImage.
         """
-        images = []
-        return_raster_image: bool = False
+        tensors: list[torch.Tensor] = []
         timestamps: list[tuple[datetime, datetime]] | None = None
         for selector, wanted_bands in self.selections.items():
             image = read_selector(input_dict, target_dict, selector)
-            if isinstance(image, torch.Tensor):
-                if wanted_bands:
-                    image = image[wanted_bands, :, :]
-                images.append(image)
-            elif isinstance(image, RasterImage):
-                return_raster_image = True
-                if wanted_bands:
-                    images.append(image.image[wanted_bands, :, :])
-                else:
-                    images.append(image.image)
-                if timestamps is None:
-                    if image.timestamps is not None:
-                        # assume all concatenated modalities have the same
-                        # number of timestamps
-                        timestamps = image.timestamps
-        if return_raster_image:
-            result = RasterImage(
-                torch.concatenate(images, dim=self.concatenate_dim),
-                timestamps=timestamps,
-            )
-        else:
-            result = torch.concatenate(images, dim=self.concatenate_dim)
+            if wanted_bands:
+                tensors.append(image.image[wanted_bands, :, :])
+            else:
+                tensors.append(image.image)
+            if timestamps is None and image.timestamps is not None:
+                # assume all concatenated modalities have the same
+                # number of timestamps
+                timestamps = image.timestamps
+        result = RasterImage(
+            torch.concatenate(tensors, dim=self.concatenate_dim),
+            timestamps=timestamps,
+        )
         write_selector(input_dict, target_dict, self.output_selector, result)
         return input_dict, target_dict

rslearn/train/transforms/crop.py CHANGED Viewed

@@ -71,9 +71,7 @@ class Crop(Transform):
             "remove_from_top": remove_from_top,
         }
-    def apply_image(
-        self, image: RasterImage | torch.Tensor, state: dict[str, Any]
-    ) -> RasterImage | torch.Tensor:
+    def apply_image(self, image: RasterImage, state: dict[str, Any]) -> RasterImage:
         """Apply the sampled state on the specified image.
         Args:
@@ -84,22 +82,13 @@ class Crop(Transform):
         crop_size = state["crop_size"] * image.shape[-1] // image_shape[1]
         remove_from_left = state["remove_from_left"] * image.shape[-1] // image_shape[1]
         remove_from_top = state["remove_from_top"] * image.shape[-2] // image_shape[0]
-        if isinstance(image, RasterImage):
-            image.image = torchvision.transforms.functional.crop(
-                image.image,
-                top=remove_from_top,
-                left=remove_from_left,
-                height=crop_size,
-                width=crop_size,
-            )
-        else:
-            image = torchvision.transforms.functional.crop(
-                image,
-                top=remove_from_top,
-                left=remove_from_left,
-                height=crop_size,
-                width=crop_size,
-            )
+        image.image = torchvision.transforms.functional.crop(
+            image.image,
+            top=remove_from_top,
+            left=remove_from_left,
+            height=crop_size,
+            width=crop_size,
+        )
         return image
     def apply_boxes(self, boxes: Any, state: dict[str, bool]) -> torch.Tensor:

rslearn/train/transforms/flip.py CHANGED Viewed

@@ -57,16 +57,10 @@ class Flip(Transform):
             image: the image to transform.
             state: the sampled state.
         """
-        if isinstance(image, RasterImage):
-            if state["horizontal"]:
-                image.image = torch.flip(image.image, dims=[-1])
-            if state["vertical"]:
-                image.image = torch.flip(image.image, dims=[-2])
-        elif isinstance(image, torch.Tensor):
-            if state["horizontal"]:
-                image = torch.flip(image, dims=[-1])
-            if state["vertical"]:
-                image = torch.flip(image, dims=[-2])
+        if state["horizontal"]:
+            image.image = torch.flip(image.image, dims=[-1])
+        if state["vertical"]:
+            image.image = torch.flip(image.image, dims=[-2])
         return image
     def apply_boxes(

rslearn 0.0.25__py3-none-any.whl → 0.0.27__py3-none-any.whl

rslearn 0.0.25py3-none-any.whl → 0.0.27py3-none-any.whl