PyPI - rslearn - Versions diffs - 0.0.1__py3-none-any.whl → 0.0.21__py3-none-any.whl - Mend

rslearn 0.0.1py3-none-any.whl → 0.0.21py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (166) hide show

rslearn/arg_parser.py +31 -0
rslearn/config/__init__.py +6 -12
rslearn/config/dataset.py +520 -401
rslearn/const.py +9 -15
rslearn/data_sources/__init__.py +8 -23
rslearn/data_sources/aws_landsat.py +242 -98
rslearn/data_sources/aws_open_data.py +111 -151
rslearn/data_sources/aws_sentinel1.py +131 -0
rslearn/data_sources/climate_data_store.py +471 -0
rslearn/data_sources/copernicus.py +884 -12
rslearn/data_sources/data_source.py +43 -12
rslearn/data_sources/earthdaily.py +484 -0
rslearn/data_sources/earthdata_srtm.py +282 -0
rslearn/data_sources/eurocrops.py +242 -0
rslearn/data_sources/gcp_public_data.py +578 -222
rslearn/data_sources/google_earth_engine.py +461 -135
rslearn/data_sources/local_files.py +219 -150
rslearn/data_sources/openstreetmap.py +51 -89
rslearn/data_sources/planet.py +24 -60
rslearn/data_sources/planet_basemap.py +275 -0
rslearn/data_sources/planetary_computer.py +798 -0
rslearn/data_sources/usda_cdl.py +195 -0
rslearn/data_sources/usgs_landsat.py +115 -83
rslearn/data_sources/utils.py +249 -61
rslearn/data_sources/vector_source.py +1 -0
rslearn/data_sources/worldcereal.py +449 -0
rslearn/data_sources/worldcover.py +144 -0
rslearn/data_sources/worldpop.py +153 -0
rslearn/data_sources/xyz_tiles.py +150 -107
rslearn/dataset/__init__.py +8 -2
rslearn/dataset/add_windows.py +2 -2
rslearn/dataset/dataset.py +40 -51
rslearn/dataset/handler_summaries.py +131 -0
rslearn/dataset/manage.py +313 -74
rslearn/dataset/materialize.py +431 -107
rslearn/dataset/remap.py +29 -4
rslearn/dataset/storage/__init__.py +1 -0
rslearn/dataset/storage/file.py +202 -0
rslearn/dataset/storage/storage.py +140 -0
rslearn/dataset/window.py +181 -44
rslearn/lightning_cli.py +454 -0
rslearn/log_utils.py +24 -0
rslearn/main.py +384 -181
rslearn/models/anysat.py +215 -0
rslearn/models/attention_pooling.py +177 -0
rslearn/models/clay/clay.py +231 -0
rslearn/models/clay/configs/metadata.yaml +295 -0
rslearn/models/clip.py +68 -0
rslearn/models/component.py +111 -0
rslearn/models/concatenate_features.py +103 -0
rslearn/models/conv.py +63 -0
rslearn/models/croma.py +306 -0
rslearn/models/detr/__init__.py +5 -0
rslearn/models/detr/box_ops.py +103 -0
rslearn/models/detr/detr.py +504 -0
rslearn/models/detr/matcher.py +107 -0
rslearn/models/detr/position_encoding.py +114 -0
rslearn/models/detr/transformer.py +429 -0
rslearn/models/detr/util.py +24 -0
rslearn/models/dinov3.py +177 -0
rslearn/models/faster_rcnn.py +30 -28
rslearn/models/feature_center_crop.py +53 -0
rslearn/models/fpn.py +19 -8
rslearn/models/galileo/__init__.py +5 -0
rslearn/models/galileo/galileo.py +595 -0
rslearn/models/galileo/single_file_galileo.py +1678 -0
rslearn/models/module_wrapper.py +65 -0
rslearn/models/molmo.py +69 -0
rslearn/models/multitask.py +384 -28
rslearn/models/olmoearth_pretrain/__init__.py +1 -0
rslearn/models/olmoearth_pretrain/model.py +421 -0
rslearn/models/olmoearth_pretrain/norm.py +86 -0
rslearn/models/panopticon.py +170 -0
rslearn/models/panopticon_data/sensors/drone.yaml +32 -0
rslearn/models/panopticon_data/sensors/enmap.yaml +904 -0
rslearn/models/panopticon_data/sensors/goes.yaml +9 -0
rslearn/models/panopticon_data/sensors/himawari.yaml +9 -0
rslearn/models/panopticon_data/sensors/intuition.yaml +606 -0
rslearn/models/panopticon_data/sensors/landsat8.yaml +84 -0
rslearn/models/panopticon_data/sensors/modis_terra.yaml +99 -0
rslearn/models/panopticon_data/sensors/qb2_ge1.yaml +34 -0
rslearn/models/panopticon_data/sensors/sentinel1.yaml +85 -0
rslearn/models/panopticon_data/sensors/sentinel2.yaml +97 -0
rslearn/models/panopticon_data/sensors/superdove.yaml +60 -0
rslearn/models/panopticon_data/sensors/wv23.yaml +63 -0
rslearn/models/pick_features.py +17 -10
rslearn/models/pooling_decoder.py +60 -7
rslearn/models/presto/__init__.py +5 -0
rslearn/models/presto/presto.py +297 -0
rslearn/models/presto/single_file_presto.py +926 -0
rslearn/models/prithvi.py +1147 -0
rslearn/models/resize_features.py +59 -0
rslearn/models/sam2_enc.py +13 -9
rslearn/models/satlaspretrain.py +38 -18
rslearn/models/simple_time_series.py +188 -77
rslearn/models/singletask.py +24 -13
rslearn/models/ssl4eo_s12.py +40 -30
rslearn/models/swin.py +44 -32
rslearn/models/task_embedding.py +250 -0
rslearn/models/terramind.py +256 -0
rslearn/models/trunk.py +139 -0
rslearn/models/unet.py +68 -22
rslearn/models/upsample.py +48 -0
rslearn/models/use_croma.py +508 -0
rslearn/template_params.py +26 -0
rslearn/tile_stores/__init__.py +41 -18
rslearn/tile_stores/default.py +409 -0
rslearn/tile_stores/tile_store.py +236 -132
rslearn/train/all_patches_dataset.py +530 -0
rslearn/train/callbacks/adapters.py +53 -0
rslearn/train/callbacks/freeze_unfreeze.py +348 -17
rslearn/train/callbacks/gradients.py +129 -0
rslearn/train/callbacks/peft.py +116 -0
rslearn/train/data_module.py +444 -20
rslearn/train/dataset.py +588 -235
rslearn/train/lightning_module.py +192 -62
rslearn/train/model_context.py +88 -0
rslearn/train/optimizer.py +31 -0
rslearn/train/prediction_writer.py +319 -84
rslearn/train/scheduler.py +92 -0
rslearn/train/tasks/classification.py +55 -28
rslearn/train/tasks/detection.py +132 -76
rslearn/train/tasks/embedding.py +120 -0
rslearn/train/tasks/multi_task.py +28 -14
rslearn/train/tasks/per_pixel_regression.py +291 -0
rslearn/train/tasks/regression.py +161 -44
rslearn/train/tasks/segmentation.py +428 -53
rslearn/train/tasks/task.py +6 -5
rslearn/train/transforms/__init__.py +1 -1
rslearn/train/transforms/concatenate.py +54 -10
rslearn/train/transforms/crop.py +29 -11
rslearn/train/transforms/flip.py +18 -6
rslearn/train/transforms/mask.py +78 -0
rslearn/train/transforms/normalize.py +101 -17
rslearn/train/transforms/pad.py +19 -7
rslearn/train/transforms/resize.py +83 -0
rslearn/train/transforms/select_bands.py +76 -0
rslearn/train/transforms/sentinel1.py +75 -0
rslearn/train/transforms/transform.py +89 -70
rslearn/utils/__init__.py +2 -6
rslearn/utils/array.py +8 -6
rslearn/utils/feature.py +2 -2
rslearn/utils/fsspec.py +90 -1
rslearn/utils/geometry.py +347 -7
rslearn/utils/get_utm_ups_crs.py +2 -3
rslearn/utils/grid_index.py +5 -5
rslearn/utils/jsonargparse.py +178 -0
rslearn/utils/mp.py +4 -3
rslearn/utils/raster_format.py +268 -116
rslearn/utils/rtree_index.py +64 -17
rslearn/utils/sqlite_index.py +7 -1
rslearn/utils/vector_format.py +252 -97
{rslearn-0.0.1.dist-info → rslearn-0.0.21.dist-info}/METADATA +532 -283
rslearn-0.0.21.dist-info/RECORD +167 -0
{rslearn-0.0.1.dist-info → rslearn-0.0.21.dist-info}/WHEEL +1 -1
rslearn-0.0.21.dist-info/licenses/NOTICE +115 -0
rslearn/data_sources/raster_source.py +0 -309
rslearn/models/registry.py +0 -5
rslearn/tile_stores/file.py +0 -242
rslearn/utils/mgrs.py +0 -24
rslearn/utils/utils.py +0 -22
rslearn-0.0.1.dist-info/RECORD +0 -88
/rslearn/{data_sources/geotiff.py → py.typed} +0 -0
{rslearn-0.0.1.dist-info → rslearn-0.0.21.dist-info}/entry_points.txt +0 -0
{rslearn-0.0.1.dist-info → rslearn-0.0.21.dist-info/licenses}/LICENSE +0 -0
{rslearn-0.0.1.dist-info → rslearn-0.0.21.dist-info}/top_level.txt +0 -0

rslearn/models/ssl4eo_s12.py CHANGED Viewed

@@ -1,20 +1,22 @@
 """SSL4EO-S12 models."""
-from typing import Any
 import torch
 import torchvision
+from rslearn.train.model_context import ModelContext
+from .component import FeatureExtractor, FeatureMaps
-class Ssl4eoS12(torch.nn.Module):
+class Ssl4eoS12(FeatureExtractor):
     """The SSL4EO-S12 family of pretrained models."""
     def __init__(
         self,
-        backbone_ckpt_path: str,
+        backbone_ckpt_path: str | None,
         arch: str = "resnet50",
         output_layers: list[int] = [0, 1, 2, 3],
-    ):
+    ) -> None:
         """Instantiate a new Swin instance.
         Args:
@@ -37,21 +39,24 @@ class Ssl4eoS12(torch.nn.Module):
         else:
             raise ValueError(f"unknown SSL4EO-S12 architecture {arch}")
-        state_dict = torch.load(backbone_ckpt_path)
-        state_dict = state_dict["teacher"]
-        prefix = "module.backbone."
-        state_dict = {
-            k[len(prefix) :]: v for k, v in state_dict.items() if k.startswith(prefix)
-        }
-        missing_keys, unexpected_keys = self.model.load_state_dict(
-            state_dict, strict=False
-        )
-        if missing_keys or unexpected_keys:
-            print(
-                f"warning: got missing_keys={missing_keys}, unexpected_keys={unexpected_keys} when loading SSL4EO-S12 state dict"
+        if backbone_ckpt_path is not None:
+            state_dict = torch.load(backbone_ckpt_path, weights_only=True)
+            state_dict = state_dict["teacher"]
+            prefix = "module.backbone."
+            state_dict = {
+                k[len(prefix) :]: v
+                for k, v in state_dict.items()
+                if k.startswith(prefix)
+            }
+            missing_keys, unexpected_keys = self.model.load_state_dict(
+                state_dict, strict=False
             )
+            if missing_keys or unexpected_keys:
+                print(
+                    f"warning: got missing_keys={missing_keys}, unexpected_keys={unexpected_keys} when loading SSL4EO-S12 state dict"
+                )
-    def get_backbone_channels(self):
+    def get_backbone_channels(self) -> list[tuple[int, int]]:
         """Returns the output channels of this model when used as a backbone.
         The output channels is a list of (downsample_factor, depth) that corresponds
@@ -65,28 +70,33 @@ class Ssl4eoS12(torch.nn.Module):
         """
         if self.arch == "resnet50":
             all_out_channels = [
-                [4, 256],
-                [8, 512],
-                [16, 1024],
-                [32, 2048],
+                (4, 256),
+                (8, 512),
+                (16, 1024),
+                (32, 2048),
             ]
         return [all_out_channels[idx] for idx in self.output_layers]
     def forward(
-        self, inputs: list[dict[str, Any]], targets: list[dict[str, Any]] = None
-    ):
+        self,
+        context: ModelContext,
+    ) -> FeatureMaps:
         """Compute outputs from the backbone.
         If output_layers is set, then the outputs are multi-scale feature maps;
         otherwise, the model is being used for classification so the outputs are class
         probabilities and the loss.
-        Inputs:
-            inputs: input dicts that must include "image" key containing the image to
-                process.
-            targets: target dicts that are ignored unless
+        Args:
+            context: the model context. Input dicts must include "image" key containing
+                the images to process.
+        Returns:
+            feature maps computed by the pre-trained model.
         """
-        x = torch.stack([inp["image"] for inp in inputs], dim=0)
+        x = torch.stack(
+            [inp["image"].single_ts_to_chw_tensor() for inp in context.inputs], dim=0
+        )
         x = self.model.conv1(x)
         x = self.model.bn1(x)
         x = self.model.relu(x)
@@ -97,4 +107,4 @@ class Ssl4eoS12(torch.nn.Module):
         layer3 = self.model.layer3(layer2)
         layer4 = self.model.layer4(layer3)
         all_features = [layer1, layer2, layer3, layer4]
-        return [all_features[idx] for idx in self.output_layers]
+        return FeatureMaps([all_features[idx] for idx in self.output_layers])

rslearn/models/swin.py CHANGED Viewed

@@ -1,7 +1,5 @@
 """Swin Transformer."""
-from typing import Any
 import torch
 import torchvision
 from torchvision.models.swin_transformer import (
@@ -13,8 +11,12 @@ from torchvision.models.swin_transformer import (
     Swin_V2_T_Weights,
 )
+from rslearn.train.model_context import ModelContext
+from .component import FeatureExtractor, FeatureMaps, FeatureVector
-class Swin(torch.nn.Module):
+class Swin(FeatureExtractor):
     """A Swin Transformer model.
     It can either be used stand-alone for classification, or as a feature extractor in
@@ -28,15 +30,18 @@ class Swin(torch.nn.Module):
         input_channels: int = 3,
         output_layers: list[int] | None = None,
         num_outputs: int = 1000,
-    ):
+    ) -> None:
         """Instantiate a new Swin instance.
         Args:
             arch: the architecture, e.g. "swin_v2_b" (default) or "swin_t"
             pretrained: set True to use ImageNet pre-trained weights
-            input_channels: number of input channels (default 3)
+            input_channels: number of input channels (default 3). If not 3, the first
+                layer is updated and will be randomly initialized even if pretrained is
+                set.
             output_layers: list of layers to output, default use as classification
-                model. For feature extraction, [1, 3, 5, 7] is recommended.
+                model (output FeatureVector). For feature extraction, [1, 3, 5, 7] is
+                recommended.
             num_outputs: number of output logits, defaults to 1000 which matches the
                 pretrained models.
         """
@@ -89,7 +94,7 @@ class Swin(torch.nn.Module):
         if num_outputs != self.model.head.out_features:
             self.model.head = torch.nn.Linear(self.model.head.in_features, num_outputs)
-    def get_backbone_channels(self):
+    def get_backbone_channels(self) -> list[tuple[int, int]]:
         """Returns the output channels of this model when used as a backbone.
         The output channels is a list of (downsample_factor, depth) that corresponds
@@ -105,43 +110,50 @@ class Swin(torch.nn.Module):
         if self.arch in ["swin_b", "swin_v2_b"]:
             all_out_channels = [
-                [4, 128],
-                [4, 128],
-                [8, 256],
-                [8, 256],
-                [16, 512],
-                [16, 512],
-                [32, 1024],
-                [32, 1024],
+                (4, 128),
+                (4, 128),
+                (4, 128),
+                (8, 256),
+                (8, 256),
+                (16, 512),
+                (16, 512),
+                (32, 1024),
+                (32, 1024),
             ]
         elif self.arch in ["swin_s", "swin_v2_s", "swin_t", "swin_v2_t"]:
             all_out_channels = [
-                [4, 96],
-                [4, 96],
-                [8, 192],
-                [8, 192],
-                [16, 384],
-                [16, 384],
-                [32, 768],
-                [32, 768],
+                (4, 96),
+                (4, 96),
+                (8, 192),
+                (8, 192),
+                (16, 384),
+                (16, 384),
+                (32, 768),
+                (32, 768),
             ]
         return [all_out_channels[idx] for idx in self.output_layers]
     def forward(
-        self, inputs: list[dict[str, Any]], targets: list[dict[str, Any]] = None
-    ):
+        self,
+        context: ModelContext,
+    ) -> FeatureVector | FeatureMaps:
         """Compute outputs from the backbone.
         If output_layers is set, then the outputs are multi-scale feature maps;
         otherwise, the model is being used for classification so the outputs are class
         probabilities and the loss.
-        Inputs:
-            inputs: input dicts that must include "image" key containing the image to
-                process.
-            targets: target dicts that are ignored unless
+        Args:
+            context: the model context. Input dicts must include "image" key containing
+                the image to process.
+        Returns:
+            a FeatureVector if the configured output_layers is None, or a FeatureMaps
+                otherwise containing one feature map per configured output layer.
         """
-        images = torch.stack([inp["image"] for inp in inputs], dim=0)
+        images = torch.stack(
+            [inp["image"].single_ts_to_chw_tensor() for inp in context.inputs], dim=0
+        )
         if self.output_layers:
             layer_features = []
@@ -149,7 +161,7 @@ class Swin(torch.nn.Module):
             for layer in self.model.features:
                 x = layer(x)
                 layer_features.append(x.permute(0, 3, 1, 2))
-            return [layer_features[idx] for idx in self.output_layers]
+            return FeatureMaps([layer_features[idx] for idx in self.output_layers])
         else:
-            return self.model(images)
+            return FeatureVector(self.model(images))

rslearn/models/task_embedding.py ADDED Viewed

@@ -0,0 +1,250 @@
+"""Task embedding modules."""
+import math
+from typing import Any
+import torch
+from torch import nn
+class PositionalEncoding(nn.Module):
+    """Simple sinusoidal positional encoding for the task embedding. From torch docs."""
+    def __init__(self, d_model: int, dropout: float = 0.0, max_len: int = 1024):
+        """Initialize the positional encoding module.
+        Args:
+            d_model: The dimension of the model.
+            dropout: The dropout rate.
+            max_len: The maximum length of the sequence.
+        """
+        super().__init__()
+        self.dropout = nn.Dropout(p=dropout)
+        position = torch.arange(max_len).unsqueeze(1)
+        div_term = torch.exp(
+            torch.arange(0, d_model, 2) * (-math.log(10000.0) / d_model)
+        )
+        pe = torch.zeros(max_len, 1, d_model)
+        pe[:, 0, 0::2] = torch.sin(position * div_term)
+        pe[:, 0, 1::2] = torch.cos(position * div_term)
+        self.register_buffer("pe", pe)
+    def forward(self, x: torch.Tensor) -> torch.Tensor:
+        """Apply positional encoding to the input tensor.
+        Args:
+            x: Tensor, shape ``[seq_len, batch_size, embedding_dim]``
+        """
+        x = x + self.pe[: x.size(0)]
+        return self.dropout(x)
+class BaseTaskEmbedding(torch.nn.Module):
+    """Base class for task embedding modules."""
+    def __init__(self, encoder_embedding_size: int) -> None:
+        """Initialize the base task embedding module.
+        Args:
+            encoder_embedding_size: The size of the encoder embedding.
+        """
+        super().__init__()
+        self.encoder_embedding_size = encoder_embedding_size
+    def register_tasks(self, task_names: list[str]) -> None:
+        """Register the tasks.
+        This must happen post-init so that we can dynamically determine
+        the tasks to use, so it doesn't have to be specified in the config.
+        Args:
+            task_names: The names of the tasks.
+        """
+        raise NotImplementedError
+    def compute_embeds(
+        self,
+        features: list[torch.tensor],
+        inputs: list[dict[str, Any]],
+    ) -> torch.Tensor:
+        """Compute the task-specific embeddings.
+        Args:
+            features: The encoder features.
+            inputs: The inputs to the model.
+        Returns:
+            The task-specific embeddings.
+        """
+        raise NotImplementedError
+class TaskChannelEmbedding(BaseTaskEmbedding):
+    """Registers task-specific 'tokens', i.e. embeddings.
+    Each embedding is learned per-channel and copied over the full spatial dimensions.
+    Optionally, add a spatial sinusoidal positional embedding to the task embedding.
+    """
+    def __init__(
+        self,
+        encoder_embedding_size: int,
+        default_idx: int = 0,
+        add_spatial_embed: bool = False,
+    ) -> None:
+        """Initialize the task channel embedding module.
+        Args:
+            encoder_embedding_size: The size of the encoder embedding.
+            default_idx: The index of the default task, useful if loading a merged model.
+            add_spatial_embed: if true, add a spatial sinusoidal positional embedding to the task embedding
+        """
+        super().__init__(encoder_embedding_size)
+        self.default_idx = default_idx
+        self.add_spatial_embed = add_spatial_embed
+        if add_spatial_embed:
+            self.pos_embed = PositionalEncoding(encoder_embedding_size)
+    def register_tasks(self, task_names: list[str]) -> None:
+        """Register the tasks.
+        This must happen post-init so that we can dynamically determine
+        the tasks to use, so it doesn't have to be specified in the config.
+        Args:
+            task_names: The names of the tasks.
+        """
+        self.embed = torch.nn.Embedding(len(task_names), self.encoder_embedding_size)
+        self.target_to_embed_idx = {name: i for i, name in enumerate(task_names)}
+    def compute_embeds(
+        self,
+        features: list[torch.tensor],
+        inputs: list[dict[str, Any]],
+    ) -> torch.Tensor:
+        """Compute the task-specific embeddings.
+        Args:
+            inputs: The inputs to the model.
+            features: computed encoder features
+            device: The device to compute the embeddings on.
+        Returns:
+            The task-specific embeddings, shape (B, T, C), T = HW
+            The embeddings are repeated over the spatial dimensions, and optionally
+            a sinusoidal positional embedding is added.
+        """
+        try:
+            idx = [self.target_to_embed_idx[inp["dataset_source"]] for inp in inputs]
+        except KeyError:
+            idx = [self.default_idx] * len(inputs)
+        embeds = self.embed(torch.tensor(idx).to(features[0].device))
+        seq_len = features[0].shape[-1] * features[0].shape[-2]  # T = HW
+        embeds = embeds.unsqueeze(0).repeat(seq_len, 1, 1)  # T x B x C
+        if self.add_spatial_embed:
+            embeds = self.pos_embed(embeds)
+        embeds = torch.einsum("tbc->btc", embeds)  # B x T x C
+        return embeds
+    def forward(
+        self,
+        features: list[torch.tensor],
+        inputs: list[dict[str, Any]],
+        embeds: torch.Tensor | None = None,
+    ) -> list[torch.tensor]:
+        """Compute and apply task-specific embeddings to encoder features.
+        Optionally, add a spatial sinusoidal positional embedding to the task embedding.
+        Otherwise, the task embedding is repeated over the spatial dimensions.
+        Args:
+            features: The encoder features, a 1-list of B x C x H x W features.
+            inputs: The inputs to the model.
+            embeds: Already-computed task embeddings, if provided, skip the computation.
+        Returns:
+            The encoder features with the task-specific embeddings added.
+        """
+        height, width = features[0].shape[-2:]
+        assert all(f.shape[-2:] == (height, width) for f in features), (
+            "features must have the same spatial dimensions"
+        )
+        if embeds is None:
+            embeds = self.compute_embeds(features, inputs)  # B x HW x C
+        embeds = embeds.unflatten(dim=1, sizes=(height, width))  # B x H x W x C
+        for i in range(len(features)):
+            features[i] += torch.einsum("bhwc->bchw", embeds)  # B x C x H x W
+        return features
+class TaskMHAEmbedding(TaskChannelEmbedding):
+    """Multi-headed cross-attention over the spatial dimensions.
+    The task embedding is the query and the features are the key and value.
+    We copy the task embedding over the spatial dimensions, and optionally
+    add a sinusoidal positional embedding before the MHA layer.
+    """
+    def __init__(
+        self,
+        encoder_embedding_size: int,
+        num_heads: int,
+        default_idx: int = 0,
+        add_spatial_embed: bool = True,
+    ) -> None:
+        """Initialize the task MHA embedding module.
+        Args:
+            encoder_embedding_size: The size of the encoder embedding.
+            num_heads: The number of attention heads.
+            default_idx: The index of the default task, useful if loading a merged model.
+            add_spatial_embed: if true, add a spatial sinusoidal positional embedding to the task embedding
+        """
+        super().__init__(encoder_embedding_size, default_idx, add_spatial_embed)
+        self.mha = torch.nn.MultiheadAttention(
+            encoder_embedding_size, num_heads, batch_first=True
+        )
+    def register_tasks(self, task_names: list[str]) -> None:
+        """Register the tasks.
+        This must happen post-init so that we can dynamically determine
+        the tasks to use, so it doesn't have to be specified in the config.
+        Args:
+            task_names: The names of the tasks.
+        """
+        super().register_tasks(task_names)
+    def forward(
+        self,
+        features: list[torch.tensor],
+        inputs: list[dict[str, Any]],
+        embeds: torch.Tensor | None = None,
+    ) -> list[torch.tensor]:
+        """Compute and apply task-specific embeddings to encoder features.
+        Also apply the MHA layer across the spatial dimension, with the task embedding
+        as the query and the features as the key and value.
+        Args:
+            features: The encoder features, a 1-list of B x C x H x W features.
+            inputs: The inputs to the model.
+            embeds: Already-computed task embeddings, if provided, skip the computation.
+        Returns:
+            The encoder features with the task-specific embeddings added.
+        """
+        assert len(features) == 1, "TaskMHAEmbedding only supports one feature"
+        x = torch.flatten(features[0], start_dim=2)  # B x C x T, T = HW
+        if embeds is None:
+            embeds = self.compute_embeds(features, inputs)  # B x T x C
+        out = self.mha(
+            embeds,  # B x T x C
+            torch.einsum("bct->btc", x),
+            torch.einsum("bct->btc", x),
+        )[0]  # B x T x C
+        out = torch.einsum("btc->bct", out)
+        out = out.view(*features[0].shape)  # B x C x H x W
+        return [out]

rslearn 0.0.1__py3-none-any.whl → 0.0.21__py3-none-any.whl

rslearn 0.0.1py3-none-any.whl → 0.0.21py3-none-any.whl