PyPI - rslearn - Versions diffs - 0.0.16__py3-none-any.whl → 0.0.18__py3-none-any.whl - Mend

rslearn 0.0.16py3-none-any.whl → 0.0.18py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (67) hide show

rslearn/config/__init__.py +2 -0
rslearn/config/dataset.py +55 -4
rslearn/dataset/add_windows.py +1 -1
rslearn/dataset/dataset.py +9 -65
rslearn/dataset/materialize.py +5 -5
rslearn/dataset/storage/__init__.py +1 -0
rslearn/dataset/storage/file.py +202 -0
rslearn/dataset/storage/storage.py +140 -0
rslearn/dataset/window.py +26 -80
rslearn/lightning_cli.py +10 -3
rslearn/main.py +11 -36
rslearn/models/anysat.py +11 -9
rslearn/models/clay/clay.py +8 -9
rslearn/models/clip.py +18 -15
rslearn/models/component.py +99 -0
rslearn/models/concatenate_features.py +21 -11
rslearn/models/conv.py +15 -8
rslearn/models/croma.py +13 -8
rslearn/models/detr/detr.py +25 -14
rslearn/models/dinov3.py +11 -6
rslearn/models/faster_rcnn.py +19 -9
rslearn/models/feature_center_crop.py +12 -9
rslearn/models/fpn.py +19 -8
rslearn/models/galileo/galileo.py +23 -18
rslearn/models/module_wrapper.py +26 -57
rslearn/models/molmo.py +16 -14
rslearn/models/multitask.py +102 -73
rslearn/models/olmoearth_pretrain/model.py +20 -17
rslearn/models/panopticon.py +8 -7
rslearn/models/pick_features.py +18 -24
rslearn/models/pooling_decoder.py +22 -14
rslearn/models/presto/presto.py +16 -10
rslearn/models/presto/single_file_presto.py +4 -10
rslearn/models/prithvi.py +12 -8
rslearn/models/resize_features.py +21 -7
rslearn/models/sam2_enc.py +11 -9
rslearn/models/satlaspretrain.py +15 -9
rslearn/models/simple_time_series.py +31 -17
rslearn/models/singletask.py +24 -17
rslearn/models/ssl4eo_s12.py +15 -10
rslearn/models/swin.py +22 -13
rslearn/models/terramind.py +24 -7
rslearn/models/trunk.py +6 -3
rslearn/models/unet.py +18 -9
rslearn/models/upsample.py +22 -9
rslearn/train/all_patches_dataset.py +22 -18
rslearn/train/dataset.py +69 -54
rslearn/train/lightning_module.py +51 -32
rslearn/train/model_context.py +54 -0
rslearn/train/prediction_writer.py +111 -41
rslearn/train/tasks/classification.py +34 -15
rslearn/train/tasks/detection.py +24 -31
rslearn/train/tasks/embedding.py +33 -29
rslearn/train/tasks/multi_task.py +7 -7
rslearn/train/tasks/per_pixel_regression.py +41 -19
rslearn/train/tasks/regression.py +38 -21
rslearn/train/tasks/segmentation.py +33 -15
rslearn/train/tasks/task.py +3 -2
{rslearn-0.0.16.dist-info → rslearn-0.0.18.dist-info}/METADATA +58 -25
{rslearn-0.0.16.dist-info → rslearn-0.0.18.dist-info}/RECORD +65 -62
rslearn/dataset/index.py +0 -173
rslearn/models/registry.py +0 -22
{rslearn-0.0.16.dist-info → rslearn-0.0.18.dist-info}/WHEEL +0 -0
{rslearn-0.0.16.dist-info → rslearn-0.0.18.dist-info}/entry_points.txt +0 -0
{rslearn-0.0.16.dist-info → rslearn-0.0.18.dist-info}/licenses/LICENSE +0 -0
{rslearn-0.0.16.dist-info → rslearn-0.0.18.dist-info}/licenses/NOTICE +0 -0
{rslearn-0.0.16.dist-info → rslearn-0.0.18.dist-info}/top_level.txt +0 -0

rslearn/train/tasks/detection.py CHANGED Viewed

@@ -12,6 +12,7 @@ import torchmetrics.classification
 import torchvision
 from torchmetrics import Metric, MetricCollection
+from rslearn.train.model_context import SampleMetadata
 from rslearn.utils import Feature, STGeometry
 from .task import BasicTask
@@ -127,7 +128,7 @@ class DetectionTask(BasicTask):
     def process_inputs(
         self,
         raw_inputs: dict[str, torch.Tensor | list[Feature]],
-        metadata: dict[str, Any],
+        metadata: SampleMetadata,
         load_targets: bool = True,
     ) -> tuple[dict[str, Any], dict[str, Any]]:
         """Processes the data into targets.
@@ -144,6 +145,8 @@ class DetectionTask(BasicTask):
         if not load_targets:
             return {}, {}
+        bounds = metadata.patch_bounds
         boxes = []
         class_labels = []
         valid = 1
@@ -186,39 +189,33 @@ class DetectionTask(BasicTask):
             else:
                 box = [int(val) for val in shp.bounds]
-            if box[0] >= metadata["bounds"][2] or box[2] <= metadata["bounds"][0]:
+            if box[0] >= bounds[2] or box[2] <= bounds[0]:
                 continue
-            if box[1] >= metadata["bounds"][3] or box[3] <= metadata["bounds"][1]:
+            if box[1] >= bounds[3] or box[3] <= bounds[1]:
                 continue
             if self.exclude_by_center:
                 center_col = (box[0] + box[2]) // 2
                 center_row = (box[1] + box[3]) // 2
-                if (
-                    center_col <= metadata["bounds"][0]
-                    or center_col >= metadata["bounds"][2]
-                ):
+                if center_col <= bounds[0] or center_col >= bounds[2]:
                     continue
-                if (
-                    center_row <= metadata["bounds"][1]
-                    or center_row >= metadata["bounds"][3]
-                ):
+                if center_row <= bounds[1] or center_row >= bounds[3]:
                     continue
             if self.clip_boxes:
                 box = [
-                    np.clip(box[0], metadata["bounds"][0], metadata["bounds"][2]),
-                    np.clip(box[1], metadata["bounds"][1], metadata["bounds"][3]),
-                    np.clip(box[2], metadata["bounds"][0], metadata["bounds"][2]),
-                    np.clip(box[3], metadata["bounds"][1], metadata["bounds"][3]),
+                    np.clip(box[0], bounds[0], bounds[2]),
+                    np.clip(box[1], bounds[1], bounds[3]),
+                    np.clip(box[2], bounds[0], bounds[2]),
+                    np.clip(box[3], bounds[1], bounds[3]),
                 ]
             # Convert to relative coordinates.
             box = [
-                box[0] - metadata["bounds"][0],
-                box[1] - metadata["bounds"][1],
-                box[2] - metadata["bounds"][0],
-                box[3] - metadata["bounds"][1],
+                box[0] - bounds[0],
+                box[1] - bounds[1],
+                box[2] - bounds[0],
+                box[3] - bounds[1],
             ]
             boxes.append(box)
@@ -238,16 +235,12 @@ class DetectionTask(BasicTask):
             "valid": torch.tensor(valid, dtype=torch.int32),
             "boxes": boxes,
             "labels": class_labels,
-            "width": torch.tensor(
-                metadata["bounds"][2] - metadata["bounds"][0], dtype=torch.float32
-            ),
-            "height": torch.tensor(
-                metadata["bounds"][3] - metadata["bounds"][1], dtype=torch.float32
-            ),
+            "width": torch.tensor(bounds[2] - bounds[0], dtype=torch.float32),
+            "height": torch.tensor(bounds[3] - bounds[1], dtype=torch.float32),
         }
     def process_output(
-        self, raw_output: Any, metadata: dict[str, Any]
+        self, raw_output: Any, metadata: SampleMetadata
     ) -> npt.NDArray[Any] | list[Feature]:
         """Processes an output into raster or vector data.
@@ -267,12 +260,12 @@ class DetectionTask(BasicTask):
         features = []
         for box, class_id, score in zip(boxes, class_ids, scores):
             shp = shapely.box(
-                metadata["bounds"][0] + float(box[0]),
-                metadata["bounds"][1] + float(box[1]),
-                metadata["bounds"][0] + float(box[2]),
-                metadata["bounds"][1] + float(box[3]),
+                metadata.patch_bounds[0] + float(box[0]),
+                metadata.patch_bounds[1] + float(box[1]),
+                metadata.patch_bounds[0] + float(box[2]),
+                metadata.patch_bounds[1] + float(box[3]),
             )
-            geom = STGeometry(metadata["projection"], shp, None)
+            geom = STGeometry(metadata.projection, shp, None)
             properties: dict[str, Any] = {
                 "score": float(score),
             }

rslearn/train/tasks/embedding.py CHANGED Viewed

@@ -6,6 +6,8 @@ import numpy.typing as npt
 import torch
 from torchmetrics import MetricCollection
+from rslearn.models.component import FeatureMaps
+from rslearn.train.model_context import ModelContext, ModelOutput, SampleMetadata
 from rslearn.utils import Feature
 from .task import Task
@@ -21,7 +23,7 @@ class EmbeddingTask(Task):
     def process_inputs(
         self,
         raw_inputs: dict[str, torch.Tensor],
-        metadata: dict[str, Any],
+        metadata: SampleMetadata,
         load_targets: bool = True,
     ) -> tuple[dict[str, Any], dict[str, Any]]:
         """Processes the data into targets.
@@ -38,17 +40,22 @@ class EmbeddingTask(Task):
         return {}, {}
     def process_output(
-        self, raw_output: Any, metadata: dict[str, Any]
+        self, raw_output: Any, metadata: SampleMetadata
     ) -> npt.NDArray[Any] | list[Feature]:
         """Processes an output into raster or vector data.
         Args:
-            raw_output: the output from prediction head.
+            raw_output: the output from prediction head, which must be a CxHxW tensor.
             metadata: metadata about the patch being read
         Returns:
             either raster or vector data.
         """
+        if not isinstance(raw_output, torch.Tensor) or len(raw_output.shape) != 3:
+            raise ValueError(
+                "output for EmbeddingTask must be a tensor with three dimensions"
+            )
         # Just convert the raw output to numpy array that can be saved to GeoTIFF.
         return raw_output.cpu().numpy()
@@ -76,41 +83,38 @@ class EmbeddingTask(Task):
         return MetricCollection({})
-class EmbeddingHead(torch.nn.Module):
+class EmbeddingHead:
     """Head for embedding task.
-    This picks one feature map from the input list of feature maps to output. It also
-    returns a dummy loss.
+    It just adds a dummy loss to act as a Predictor.
     """
-    def __init__(self, feature_map_index: int | None = 0):
-        """Create a new EmbeddingHead.
-        Args:
-            feature_map_index: the index of the feature map to choose from the input
-                list of multi-scale feature maps (default 0). If the input is already
-                a single feature map, then set to None.
-        """
-        super().__init__()
-        self.feature_map_index = feature_map_index
     def forward(
         self,
-        features: torch.Tensor,
-        inputs: list[dict[str, Any]],
+        intermediates: Any,
+        context: ModelContext,
         targets: list[dict[str, Any]] | None = None,
-    ) -> tuple[torch.Tensor, dict[str, Any]]:
-        """Select the desired feature map and return it along with a dummy loss.
+    ) -> ModelOutput:
+        """Return the feature map along with a dummy loss.
         Args:
-            features: list of BCHW feature maps (or one feature map, if feature_map_index is None).
-            inputs: original inputs (ignored).
-            targets: should contain classes key that stores the per-pixel class labels.
+            intermediates: output from the previous model component, which must be a
+                FeatureMaps consisting of a single feature map.
+            context: the model context.
+            targets: the targets (ignored).
         Returns:
-            tuple of outputs and loss dict
+            model output with the feature map that was input to this component along
+                with a dummy loss.
         """
-        if self.feature_map_index is not None:
-            features = features[self.feature_map_index]
-        return features, {"loss": 0}
+        if not isinstance(intermediates, FeatureMaps):
+            raise ValueError("input to EmbeddingHead must be a FeatureMaps")
+        if len(intermediates.feature_maps) != 1:
+            raise ValueError(
+                f"input to EmbeddingHead must have one feature map, but got {len(intermediates.feature_maps)}"
+            )
+        return ModelOutput(
+            outputs=intermediates.feature_maps[0],
+            loss_dict={"loss": 0},
+        )

rslearn/train/tasks/multi_task.py CHANGED Viewed

@@ -6,6 +6,7 @@ import numpy.typing as npt
 import torch
 from torchmetrics import Metric, MetricCollection
+from rslearn.train.model_context import SampleMetadata
 from rslearn.utils import Feature
 from .task import Task
@@ -30,7 +31,7 @@ class MultiTask(Task):
     def process_inputs(
         self,
         raw_inputs: dict[str, torch.Tensor | list[Feature]],
-        metadata: dict[str, Any],
+        metadata: SampleMetadata,
         load_targets: bool = True,
     ) -> tuple[dict[str, Any], dict[str, Any]]:
         """Processes the data into targets.
@@ -46,14 +47,12 @@ class MultiTask(Task):
         """
         input_dict = {}
         target_dict = {}
-        if metadata["dataset_source"] is None:
+        if metadata.dataset_source is None:
             # No multi-dataset, so always compute across all tasks
             task_iter = list(self.tasks.items())
         else:
             # Multi-dataset, so only compute for the task in this dataset
-            task_iter = [
-                (metadata["dataset_source"], self.tasks[metadata["dataset_source"]])
-            ]
+            task_iter = [(metadata.dataset_source, self.tasks[metadata.dataset_source])]
         for task_name, task in task_iter:
             cur_raw_inputs = {}
@@ -71,12 +70,13 @@ class MultiTask(Task):
         return input_dict, target_dict
     def process_output(
-        self, raw_output: Any, metadata: dict[str, Any]
+        self, raw_output: Any, metadata: SampleMetadata
     ) -> dict[str, Any]:
         """Processes an output into raster or vector data.
         Args:
-            raw_output: the output from prediction head.
+            raw_output: the output from prediction head. It must be a dict mapping from
+                task name to per-task output for this sample.
             metadata: metadata about the patch being read
         Returns:

rslearn/train/tasks/per_pixel_regression.py CHANGED Viewed

@@ -8,6 +8,8 @@ import torch
 import torchmetrics
 from torchmetrics import Metric, MetricCollection
+from rslearn.models.component import FeatureMaps, Predictor
+from rslearn.train.model_context import ModelContext, ModelOutput, SampleMetadata
 from rslearn.utils.feature import Feature
 from .task import BasicTask
@@ -41,7 +43,7 @@ class PerPixelRegressionTask(BasicTask):
     def process_inputs(
         self,
         raw_inputs: dict[str, torch.Tensor],
-        metadata: dict[str, Any],
+        metadata: SampleMetadata,
         load_targets: bool = True,
     ) -> tuple[dict[str, Any], dict[str, Any]]:
         """Processes the data into targets.
@@ -72,20 +74,23 @@ class PerPixelRegressionTask(BasicTask):
         }
     def process_output(
-        self, raw_output: Any, metadata: dict[str, Any]
+        self, raw_output: Any, metadata: SampleMetadata
     ) -> npt.NDArray[Any] | list[Feature]:
         """Processes an output into raster or vector data.
         Args:
-            raw_output: the output from prediction head.
+            raw_output: the output from prediction head, which must be an HW tensor.
             metadata: metadata about the patch being read
         Returns:
             either raster or vector data.
         """
-        # Input could be CHW (with single channel) or just HW.
-        if len(raw_output.shape) == 2:
-            raw_output = raw_output[None, :, :]
+        if not isinstance(raw_output, torch.Tensor):
+            raise ValueError("output for PerPixelRegressionTask must be a tensor")
+        if len(raw_output.shape) != 2:
+            raise ValueError(
+                f"PerPixelRegressionTask output must be an HW tensor, but got shape {raw_output.shape}"
+            )
         return (raw_output / self.scale_factor).cpu().numpy()
     def visualize(
@@ -133,7 +138,7 @@ class PerPixelRegressionTask(BasicTask):
         return MetricCollection(metric_dict)
-class PerPixelRegressionHead(torch.nn.Module):
+class PerPixelRegressionHead(Predictor):
     """Head for per-pixel regression task."""
     def __init__(
@@ -156,24 +161,38 @@ class PerPixelRegressionHead(torch.nn.Module):
     def forward(
         self,
-        logits: torch.Tensor,
-        inputs: list[dict[str, Any]],
+        intermediates: Any,
+        context: ModelContext,
         targets: list[dict[str, Any]] | None = None,
-    ) -> tuple[torch.Tensor, dict[str, Any]]:
+    ) -> ModelOutput:
         """Compute the regression outputs and loss from logits and targets.
         Args:
-            logits: BxHxW or BxCxHxW tensor.
-            inputs: original inputs (ignored).
-            targets: should contain target key that stores the regression labels.
+            intermediates: output from previous component, which must be a FeatureMaps
+                with one feature map corresponding to the logits. The channel dimension
+                size must be 1.
+            context: the model context.
+            targets: must contain values key that stores the regression labels, and
+                valid key containing mask image indicating where the labels are valid.
         Returns:
-            tuple of outputs and loss dict
+            tuple of outputs and loss dict. The output is a BHW tensor so that the
+                per-sample output is an HW tensor.
         """
-        assert len(logits.shape) in [3, 4]
-        if len(logits.shape) == 4:
-            assert logits.shape[1] == 1
-            logits = logits[:, 0, :, :]
+        if not isinstance(intermediates, FeatureMaps):
+            raise ValueError(
+                "the input to PerPixelRegressionHead must be a FeatureMaps"
+            )
+        if len(intermediates.feature_maps) != 1:
+            raise ValueError(
+                "the input to PerPixelRegressionHead must have one feature map"
+            )
+        if intermediates.feature_maps[0].shape[1] != 1:
+            raise ValueError(
+                f"the input to PerPixelRegressionHead must have channel dimension size 1, but got {intermediates.feature_maps[0].shape}"
+            )
+        logits = intermediates.feature_maps[0][:, 0, :, :]
         if self.use_sigmoid:
             outputs = torch.nn.functional.sigmoid(logits)
@@ -200,7 +219,10 @@ class PerPixelRegressionHead(torch.nn.Module):
             else:
                 losses["regress"] = (scores * mask).sum() / mask_total
-        return outputs, losses
+        return ModelOutput(
+            outputs=outputs,
+            loss_dict=losses,
+        )
 class PerPixelRegressionMetricWrapper(Metric):

rslearn/train/tasks/regression.py CHANGED Viewed

@@ -10,6 +10,8 @@ import torchmetrics
 from PIL import Image, ImageDraw
 from torchmetrics import Metric, MetricCollection
+from rslearn.models.component import FeatureVector, Predictor
+from rslearn.train.model_context import ModelContext, ModelOutput, SampleMetadata
 from rslearn.utils.feature import Feature
 from rslearn.utils.geometry import STGeometry
@@ -62,7 +64,7 @@ class RegressionTask(BasicTask):
     def process_inputs(
         self,
         raw_inputs: dict[str, torch.Tensor | list[Feature]],
-        metadata: dict[str, Any],
+        metadata: SampleMetadata,
         load_targets: bool = True,
     ) -> tuple[dict[str, Any], dict[str, Any]]:
         """Processes the data into targets.
@@ -103,22 +105,26 @@ class RegressionTask(BasicTask):
         }
     def process_output(
-        self, raw_output: Any, metadata: dict[str, Any]
-    ) -> npt.NDArray[Any] | list[Feature]:
+        self, raw_output: Any, metadata: SampleMetadata
+    ) -> list[Feature]:
         """Processes an output into raster or vector data.
         Args:
-            raw_output: the output from prediction head.
+            raw_output: the output from prediction head, which must be a scalar tensor.
             metadata: metadata about the patch being read
         Returns:
-            either raster or vector data.
+            a list with a single Feature corresponding to the patch extent and with a
+                property containing the predicted value.
         """
+        if not isinstance(raw_output, torch.Tensor) or len(raw_output.shape) != 0:
+            raise ValueError("output for RegressionTask must be a scalar Tensor")
         output = raw_output.item() / self.scale_factor
         feature = Feature(
             STGeometry(
-                metadata["projection"],
-                shapely.Point(metadata["bounds"][0], metadata["bounds"][1]),
+                metadata.projection,
+                shapely.Point(metadata.patch_bounds[0], metadata.patch_bounds[1]),
                 None,
             ),
             {
@@ -180,7 +186,7 @@ class RegressionTask(BasicTask):
         return MetricCollection(metric_dict)
-class RegressionHead(torch.nn.Module):
+class RegressionHead(Predictor):
     """Head for regression task."""
     def __init__(
@@ -199,24 +205,32 @@ class RegressionHead(torch.nn.Module):
     def forward(
         self,
-        logits: torch.Tensor,
-        inputs: list[dict[str, Any]],
+        intermediates: Any,
+        context: ModelContext,
         targets: list[dict[str, Any]] | None = None,
-    ) -> tuple[torch.Tensor, dict[str, Any]]:
+    ) -> ModelOutput:
         """Compute the regression outputs and loss from logits and targets.
         Args:
-            logits: tensor that is (BatchSize, 1) or (BatchSize) in shape.
-            inputs: original inputs (ignored).
-            targets: should contain target key that stores the regression label.
+            intermediates: output from previous model component, which must be a
+                FeatureVector with channel dimension size 1 (Bx1).
+            context: the model context.
+            targets: target dicts, which each must contain a "value" key containing the
+                regression label, along with a "valid" key containing a flag indicating
+                whether each example is valid for this task.
         Returns:
-            tuple of outputs and loss dict
+            the model outputs. The output is a B tensor so that it is split up into a
+                scalar for each example.
         """
-        assert len(logits.shape) in [1, 2]
-        if len(logits.shape) == 2:
-            assert logits.shape[1] == 1
-            logits = logits[:, 0]
+        if not isinstance(intermediates, FeatureVector):
+            raise ValueError("the input to RegressionHead must be a FeatureVector")
+        if intermediates.feature_vector.shape[1] != 1:
+            raise ValueError(
+                f"the input to RegressionHead must have channel dimension size 1, but got shape {intermediates.feature_vector.shape}"
+            )
+        logits = intermediates.feature_vector[:, 0]
         if self.use_sigmoid:
             outputs = torch.nn.functional.sigmoid(logits)
@@ -232,9 +246,12 @@ class RegressionHead(torch.nn.Module):
             elif self.loss_mode == "l1":
                 losses["regress"] = torch.mean(torch.abs(outputs - labels) * mask)
             else:
-                assert False
+                raise ValueError(f"unknown loss mode {self.loss_mode}")
-        return outputs, losses
+        return ModelOutput(
+            outputs=outputs,
+            loss_dict=losses,
+        )
 class RegressionMetricWrapper(Metric):

rslearn/train/tasks/segmentation.py CHANGED Viewed

@@ -8,7 +8,8 @@ import torch
 import torchmetrics.classification
 from torchmetrics import Metric, MetricCollection
-from rslearn.utils import Feature
+from rslearn.models.component import FeatureMaps, Predictor
+from rslearn.train.model_context import ModelContext, ModelOutput, SampleMetadata
 from .task import BasicTask
@@ -108,7 +109,7 @@ class SegmentationTask(BasicTask):
     def process_inputs(
         self,
         raw_inputs: dict[str, torch.Tensor],
-        metadata: dict[str, Any],
+        metadata: SampleMetadata,
         load_targets: bool = True,
     ) -> tuple[dict[str, Any], dict[str, Any]]:
         """Processes the data into targets.
@@ -148,17 +149,20 @@ class SegmentationTask(BasicTask):
         }
     def process_output(
-        self, raw_output: Any, metadata: dict[str, Any]
-    ) -> npt.NDArray[Any] | list[Feature]:
+        self, raw_output: Any, metadata: SampleMetadata
+    ) -> npt.NDArray[Any]:
         """Processes an output into raster or vector data.
         Args:
-            raw_output: the output from prediction head.
+            raw_output: the output from prediction head, which must be a CHW tensor.
             metadata: metadata about the patch being read
         Returns:
-            either raster or vector data.
+            CHW numpy array with one channel, containing the predicted class IDs.
         """
+        if not isinstance(raw_output, torch.Tensor) or len(raw_output.shape) != 3:
+            raise ValueError("the output for SegmentationTask must be a CHW tensor")
         if self.prob_scales is not None:
             raw_output = (
                 raw_output
@@ -166,7 +170,7 @@ class SegmentationTask(BasicTask):
                     self.prob_scales, device=raw_output.device, dtype=raw_output.dtype
                 )[:, None, None]
             )
-        classes = raw_output.argmax(dim=0).cpu().numpy().astype(np.uint8)
+        classes = raw_output.argmax(dim=0).cpu().numpy()
         return classes[None, :, :]
     def visualize(
@@ -258,25 +262,36 @@ class SegmentationTask(BasicTask):
         return MetricCollection(metrics)
-class SegmentationHead(torch.nn.Module):
+class SegmentationHead(Predictor):
     """Head for segmentation task."""
     def forward(
         self,
-        logits: torch.Tensor,
-        inputs: list[dict[str, Any]],
+        intermediates: Any,
+        context: ModelContext,
         targets: list[dict[str, Any]] | None = None,
-    ) -> tuple[torch.Tensor, dict[str, Any]]:
+    ) -> ModelOutput:
         """Compute the segmentation outputs from logits and targets.
         Args:
-            logits: tensor that is (BatchSize, NumClasses, Height, Width) in shape.
-            inputs: original inputs (ignored).
-            targets: should contain classes key that stores the per-pixel class labels.
+            intermediates: a FeatureMaps with a single feature map containing the
+                segmentation logits.
+            context: the model context
+            targets: list of target dicts, where each target dict must contain a key
+                "classes" containing the per-pixel class labels, along with "valid"
+                containing a mask indicating where the example is valid.
         Returns:
             tuple of outputs and loss dict
         """
+        if not isinstance(intermediates, FeatureMaps):
+            raise ValueError("input to SegmentationHead must be a FeatureMaps")
+        if len(intermediates.feature_maps) != 1:
+            raise ValueError(
+                f"input to SegmentationHead must have one feature map, but got {len(intermediates.feature_maps)}"
+            )
+        logits = intermediates.feature_maps[0]
         outputs = torch.nn.functional.softmax(logits, dim=1)
         losses = {}
@@ -295,7 +310,10 @@ class SegmentationHead(torch.nn.Module):
                 # the summed mask loss be zero.
                 losses["cls"] = torch.sum(per_pixel_loss * mask)
-        return outputs, losses
+        return ModelOutput(
+            outputs=outputs,
+            loss_dict=losses,
+        )
 class SegmentationMetric(Metric):

rslearn/train/tasks/task.py CHANGED Viewed

@@ -7,6 +7,7 @@ import numpy.typing as npt
 import torch
 from torchmetrics import MetricCollection
+from rslearn.train.model_context import SampleMetadata
 from rslearn.utils import Feature
@@ -21,7 +22,7 @@ class Task:
     def process_inputs(
         self,
         raw_inputs: dict[str, torch.Tensor | list[Feature]],
-        metadata: dict[str, Any],
+        metadata: SampleMetadata,
         load_targets: bool = True,
     ) -> tuple[dict[str, Any], dict[str, Any]]:
         """Processes the data into targets.
@@ -38,7 +39,7 @@ class Task:
         raise NotImplementedError
     def process_output(
-        self, raw_output: Any, metadata: dict[str, Any]
+        self, raw_output: Any, metadata: SampleMetadata
     ) -> npt.NDArray[Any] | list[Feature] | dict[str, Any]:
         """Processes an output into raster or vector data.

rslearn 0.0.16__py3-none-any.whl → 0.0.18__py3-none-any.whl

rslearn 0.0.16py3-none-any.whl → 0.0.18py3-none-any.whl